rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -80,7 +80,7 @@ module Open
80
80
  # Cache
81
81
 
82
82
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest(url)
83
+ digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
84
84
 
85
85
  filename = File.join(REMOTE_CACHEDIR, digest)
86
86
  if File.exists? filename
@@ -91,7 +91,7 @@ module Open
91
91
  end
92
92
 
93
93
  def self.add_cache(url, data, options = {})
94
- digest = Digest::MD5.hexdigest(url)
94
+ digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
95
95
  Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
96
96
  end
97
97
 
@@ -163,19 +163,28 @@ module Open
163
163
  when (not remote?(url))
164
164
  file_open(url, options[:grep])
165
165
  when options[:nocache]
166
+ # What about grep?
166
167
  wget(url, wget_options)
167
- when in_cache(url)
168
- file_open(in_cache(url), options[:grep])
168
+ when in_cache(url, wget_options)
169
+ file_open(in_cache(url, wget_options), options[:grep])
169
170
  else
170
171
  io = wget(url, wget_options)
171
- add_cache(url, io)
172
+ add_cache(url, io, wget_options)
172
173
  io.close
173
- file_open(in_cache(url), options[:grep])
174
+ file_open(in_cache(url, wget_options), options[:grep])
174
175
  end
175
- io = unzip(io) if (zip? url and not options[:noz]) or options[:zip]
176
- io = gunzip(io) if (gzip? url and not options[:noz]) or options[:gzip]
176
+ io = unzip(io) if (zip?(url) and not options[:noz]) or options[:zip]
177
+ io = gunzip(io) if (gzip?(url) and not options[:noz]) or options[:gzip]
177
178
 
178
- io
179
+ if block_given?
180
+ yield io
181
+ else
182
+ io
183
+ end
184
+ end
185
+
186
+ def self.can_open?(file)
187
+ String === file and (File.exists?(file) or remote?(file))
179
188
  end
180
189
 
181
190
  def self.read(file, options = {}, &block)
@@ -0,0 +1,152 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/tsv'
3
+
4
+ module Path
5
+ attr_accessor :pkg_module, :datadir
6
+
7
+ def self.find_files_back_to(path, target, subdir)
8
+ return [] if path.nil?
9
+ raise "Path #{ path } not in directory #{ subdir }" if not Misc.in_directory? path, subdir
10
+
11
+ pkg_module = path.pkg_module
12
+
13
+ files = []
14
+ while path != subdir
15
+ path = File.dirname(path)
16
+ path.extend Path
17
+ path.pkg_module = pkg_module
18
+ path.datadir = path.datadir
19
+ if path[target].exists?
20
+ files << path[target]
21
+ end
22
+ end
23
+
24
+ files
25
+ end
26
+
27
+ def self.path(string, datadir = nil, pkg_module = nil)
28
+ string.extend Path
29
+ string.datadir = datadir
30
+ string.pkg_module = case
31
+ when pkg_module.nil?
32
+ nil
33
+ when String === pkg_module
34
+ Misc.string2const pkg_module
35
+ else
36
+ pkg_module
37
+ end
38
+ string
39
+ end
40
+
41
+ def method_missing(name, *args, &block)
42
+ new = File.join(self.dup, name.to_s)
43
+ new.extend Path
44
+ new.pkg_module = pkg_module
45
+ new.datadir = datadir
46
+ new
47
+ end
48
+
49
+ def [](name)
50
+ new = File.join(self.dup, name.to_s)
51
+ new.extend Path
52
+ new.pkg_module = pkg_module
53
+ new.datadir = datadir
54
+ new
55
+ end
56
+
57
+ def namespace
58
+ return nil if self.nil? or self.empty? or (not datadir.nil? and (self == datadir or File.dirname(self) == datadir))
59
+ if File.directory? self
60
+ File.basename(self)
61
+ else
62
+ File.basename(File.dirname(self))
63
+ end
64
+ end
65
+
66
+ def identifier_files
67
+ if datadir.nil?
68
+ path = File.join(File.dirname(self), 'identifiers')
69
+ path.extend Path
70
+ path.pkg_module = pkg_module
71
+ if path.exists?
72
+ [path]
73
+ else
74
+ []
75
+ end
76
+ else
77
+ identifier_files = Path.find_files_back_to(self, 'identifiers', datadir)
78
+ return identifier_files.collect{|f| Path.path(f, datadir, pkg_module)}
79
+ end
80
+ end
81
+
82
+ def tsv(key = nil, options = {})
83
+ if options.empty? and Hash === key
84
+ options, key = key, nil
85
+ end
86
+
87
+ produce
88
+ TSV.new self, key, options.merge(:datadir => datadir)
89
+ end
90
+
91
+ def index(options = {})
92
+ produce
93
+ TSV.index self, options
94
+ end
95
+
96
+ def open(options = {})
97
+ produce
98
+ Open.open(self, options)
99
+ end
100
+
101
+ def read(options = {})
102
+ produce
103
+ Open.read(self, options)
104
+ end
105
+
106
+ def fields(sep = nil, header_hash = nil)
107
+ produce
108
+ TSV.parse_header(self.open, sep, header_hash)[1].collect{|f| f.extend TSV::Field; f.namespace = namespace ;f}
109
+ end
110
+
111
+ def all_fields(sep = nil, header_hash = nil)
112
+ produce
113
+ key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}
114
+ end
115
+
116
+ def fields_in_namespace(sep = nil, header_hash = nil)
117
+ produce
118
+ TSV.parse_header(self.open, sep, header_hash)[1].collect{|f| f.extend TSV::Field; f.namespace = namespace ;f}.select{|f| f.namespace == namespace}
119
+ end
120
+
121
+ def all_namespace_fields(sep = nil, header_hash = nil)
122
+ produce
123
+ key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}.select{|f| f.namespace == namespace}
124
+ end
125
+
126
+
127
+ def filename
128
+ self.to_s
129
+ end
130
+
131
+ def exists?
132
+ begin
133
+ produce
134
+ true
135
+ rescue
136
+ false
137
+ end
138
+ end
139
+
140
+ def produce
141
+ return if File.exists? self
142
+
143
+ Log.debug("Trying to produce '#{ self }'")
144
+ file, producer = pkg_module.reclaim self
145
+
146
+ raise "File #{self} has not been claimed, cannot produce" if file.nil? or producer.nil?
147
+
148
+ pkg_module.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
149
+ end
150
+ end
151
+
152
+
@@ -1,3 +1,4 @@
1
+ require 'rbbt/util/tsv'
1
2
  require 'rbbt/util/misc'
2
3
  require 'rbbt/util/open'
3
4
  require 'yaml'
@@ -19,91 +20,297 @@ module Persistence
19
20
  end
20
21
 
21
22
  def self.get_persistence_file(file, prefix, options = {})
22
- File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
+ name = prefix.to_s << ":" << file.to_s << ":"
24
+ File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
25
  end
24
26
 
25
- def self.persist(file, prefix = "", persistence_type = :string, options = {})
26
- options = Misc.add_defaults options, :persistence => true
27
+ def self.get_filename(file)
28
+ case
29
+ when (String === file and File.exists? file)
30
+ File.expand_path file
31
+ when File === file
32
+ File.expand_path file.path
33
+ when Object::TSV === file
34
+ file.filename
35
+ when String === file
36
+ file
37
+ else
38
+ file.class.to_s
39
+ end
40
+ end
41
+
42
+ def self.persist_string(file, prefix = "", options = {})
43
+ options =
44
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
45
+ persistence_update, persistence_file, filename =
46
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
47
+
48
+ filename ||= get_filename(file)
49
+ persistence_file ||= get_persistence_file(filename, prefix, options)
50
+
51
+ if persistence_update or not File.exists? persistence_file
52
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
53
+
54
+ res = yield file, options, filename, persistence_file
55
+ Open.write(persistence_file, res.to_s)
56
+ res
57
+ else
58
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
59
+
60
+ Open.read(persistence_file)
61
+ end
62
+ end
63
+
64
+ def self.persist_marshal(file, prefix = "", options = {})
65
+ options =
66
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
67
+ persistence_update, persistence_file, filename =
68
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
69
+
70
+ filename ||= get_filename(file)
71
+ persistence_file ||= get_persistence_file(filename, prefix, options)
72
+
73
+ if persistence_update or not File.exists? persistence_file
74
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
75
+ res = yield file, options
76
+ Open.write(persistence_file, Marshal.dump(res))
77
+ res
78
+ else
79
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
80
+ Marshal.load(Open.open(persistence_file))
81
+ end
82
+ end
83
+
84
+ def self.persist_yaml(file, prefix = "", options = {})
85
+ options =
86
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
87
+ persistence_update, persistence_file, filename =
88
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
89
+
90
+ filename ||= get_filename(file)
91
+ persistence_file ||= get_persistence_file(filename, prefix, options)
92
+
93
+ if persistence_update or not File.exists? persistence_file
94
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
95
+ res = yield file, options, filename, persistence_file
96
+ Open.write(persistence_file, YAML.dump(res))
97
+ res
98
+ else
99
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
100
+ YAML.load(Open.open(persistence_file))
101
+ end
102
+ end
103
+
104
+ def self.persist_tsv_string(file, prefix = "", options = {})
105
+ options =
106
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
107
+ persistence_update, persistence_file, filename =
108
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
109
+
110
+ filename ||= get_filename(file)
111
+ persistence_file ||= get_persistence_file(filename, prefix, options)
112
+
113
+ if persistence_update or not File.exists? persistence_file
114
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
115
+ res = yield file, options, filename, persistence_file
116
+ Open.write(persistence_file, res.to_s)
117
+ res
118
+ else
119
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
120
+ tsv = Object::TSV.new persistence_file
121
+ tsv.filename = filename
122
+ tsv
123
+ end
124
+ end
125
+
126
+ def self.tsv_serializer(data)
127
+ case
128
+ when (not Object::TSV === data)
129
+ :marshal
130
+ when data.type == :double
131
+ :double
132
+ when data.type == :single
133
+ :single
134
+ else
135
+ :list
136
+ end
137
+ end
138
+
139
+ def self.persist_tsv(file, prefix = "", options = {})
140
+ options =
141
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
142
+ persistence_update, persistence_file, filename =
143
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
27
144
 
28
- persistence, persistence_file =
29
- Misc.process_options options, :persistence, :persistence_file
30
-
31
- filename = Misc.process_options options, :filename
32
- filename ||= case
33
- when (String === file and File.exists? file)
34
- File.expand_path file
35
- when File === file
36
- File.expand_path file.path
37
- when TSV === file
38
- file.filename
39
- else
40
- Digest::MD5.hexdigest(file.inspect)
41
- end
42
-
43
- if persistence
44
- persistence_file ||= get_persistence_file(filename, prefix, options)
45
-
46
- #{{{ CREATE
47
- if ! File.exists? persistence_file
48
- Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
49
- res = yield file, options, filename, persistence_file
50
- if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
51
- data, extra = res
52
- else
53
- data, extra = [res, nil]
145
+ filename ||= get_filename(file)
146
+ persistence_file ||= get_persistence_file(filename, prefix, options)
147
+
148
+ if persistence_update or not File.exists? persistence_file
149
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
150
+
151
+ res = yield file, options, filename, persistence_file
152
+ serializer = tsv_serializer res
153
+
154
+ if File.exists? persistence_file
155
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
156
+ FileUtils.rm persistence_file
157
+ end
158
+
159
+ per = Persistence::TSV.get persistence_file, true, serializer
160
+
161
+ per.write
162
+ per.merge! res
163
+
164
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
165
+ if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
166
+ per.send "#{key}=".to_sym, res.send(key.to_sym)
54
167
  end
55
- ddd data.filename
56
-
57
- case persistence_type.to_sym
58
- when :tsv
59
- if Hash === data or Object::TSV === data
60
- Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
61
- per = Persistence::TSV.get persistence_file
62
- per.write
63
- data.each{|k,v| per[k.to_s] = v}
64
- %w(case_insensitive fields key_field type filename). each do |key|
65
- if data.respond_to? key
66
- per.send "#{key}=".to_sym, data.send(key.to_sym)
67
- else
68
- per.send "#{key}=".to_sym, extra[key.to_sym]
69
- end
70
- end
71
- per.read
72
-
73
- data = per
74
- end
75
- when :string
76
- Open.write(persistence_file, data.to_s)
77
- when :marshal
78
- Open.write(persistence_file, Marshal.dump(data))
79
- when :yaml
80
- Open.write(persistence_file, YAML.dump(data))
168
+ end
169
+
170
+ per.read
171
+
172
+ tsv = Object::TSV.new per
173
+
174
+ tsv
175
+ else
176
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
177
+
178
+ per = Persistence::TSV.get persistence_file, true, serializer
179
+ tsv = Object::TSV.new per
180
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
181
+ if tsv.respond_to?(key.to_sym) and per.respond_to?(key.to_sym)
182
+ tsv.send "#{key}=".to_sym, per.send(key.to_sym)
81
183
  end
82
-
83
- return [data, extra]
184
+ end
84
185
 
85
- #{{{ LOAD
86
- else
87
- Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
88
- case persistence_type.to_sym
89
- when :tsv
90
- data = Persistence::TSV.get persistence_file
91
-
92
- extra = {}
93
- %W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
94
-
95
- return [data, extra]
96
- when :string
97
- return [Open.read(persistence_type), nil]
98
- when :marshal
99
- return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
100
- when :yaml
101
- return [File.open(persistence_file){|f| YAML.load(f)}, nil]
186
+ tsv
187
+ end
188
+ end
189
+
190
+ def self.persist_tsv_extra(file, prefix = "", options = {})
191
+ options =
192
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
193
+ persistence_update, persistence_file, filename =
194
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
195
+
196
+ filename ||= get_filename(file)
197
+ persistence_file ||= get_persistence_file(filename, prefix, options)
198
+
199
+ if persistence_update or not File.exists? persistence_file
200
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
201
+ res, extra = yield file, options, filename, persistence_file
202
+ serializer = tsv_serializer res
203
+
204
+ per = Persistence::TSV.get persistence_file, true, serializer
205
+
206
+ per.write
207
+ per.merge! res
208
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
209
+ if extra.include?(key.to_sym) and per.respond_to?(key.to_sym)
210
+ per.send "#{key}=".to_sym, extra[key.to_sym]
102
211
  end
212
+ end
213
+ per.read
103
214
 
215
+ [ per, extra ]
216
+ else
217
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
218
+ per = Persistence::TSV.get persistence_file, true, serializer
219
+
220
+ extra = {}
221
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
222
+ if per.respond_to?(key.to_sym)
223
+ extra[key] = per.send(key.to_sym)
224
+ end
104
225
  end
226
+
227
+ [ per, extra ]
228
+ end
229
+ end
230
+
231
+ def self.persist_fwt(file, prefix = "", options = {})
232
+ options =
233
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
234
+ persistence_update, persistence_file, filename =
235
+ Misc.process_options options, :persistence_update, :persistence_file, :filename
236
+
237
+ filename ||= get_filename(file)
238
+ persistence_file ||= get_persistence_file(filename, prefix, options)
239
+
240
+ if persistence_update or not File.exists? persistence_file
241
+ Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
242
+
243
+ range = options[:range]
244
+
245
+ res = yield file, options, filename, persistence_file
246
+
247
+ if File.exists? persistence_file
248
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
249
+ FileUtils.rm persistence_file
250
+ end
251
+
252
+
253
+ max_length = res.collect{|k,v| k.length}.max
254
+
255
+ if range
256
+ begin
257
+ fwt = FixWidthTable.new persistence_file, max_length, true
258
+ fwt.add_range res
259
+ rescue
260
+ FileUtils.rm persistence_file
261
+ raise $!
262
+ end
263
+ else
264
+ begin
265
+ fwt = FixWidthTable.new persistence_file, max_length, false
266
+ fwt.add_point res
267
+ rescue
268
+ FileUtils.rm persistence_file
269
+ raise $!
270
+ end
271
+ end
272
+
273
+ fwt.read
274
+
275
+ fwt
105
276
  else
106
- yield file, options
277
+ Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
278
+
279
+ fwt = FixWidthTable.new persistence_file, nil, nil
280
+
281
+ fwt
282
+ end
283
+ end
284
+
285
+ def self.persist(file, prefix = "", persistence_type = :string, options = {}, &block)
286
+ options = Misc.add_defaults options, :persistence => true
287
+ persistence =
288
+ Misc.process_options options, :persistence
289
+
290
+ filename = get_filename(file)
291
+
292
+ if not persistence
293
+ Log.low "Non Persistent Loading for #{filename}. Prefix: #{prefix}"
294
+ yield file, options, filename
295
+ else
296
+ Log.low "Persistent Loading for #{filename}. Prefix: #{prefix}. Type #{persistence_type.to_s}"
297
+
298
+ case persistence_type.to_sym
299
+ when :string
300
+ persist_string(file, prefix, options, &block)
301
+ when :marshal
302
+ persist_marshal(file, prefix, options, &block)
303
+ when :yaml
304
+ persist_yaml(file, prefix, options, &block)
305
+ when :tsv
306
+ persist_tsv(file, prefix, options, &block)
307
+ when :tsv_string
308
+ persist_tsv_string(file, prefix, options, &block)
309
+ when :tsv_extra
310
+ persist_tsv_extra(file, prefix, options, &block)
311
+ when :fwt
312
+ persist_fwt(file, prefix, options, &block)
313
+ end
107
314
  end
108
315
  end
109
316
  end