rbbt-util 1.1.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt-util.rb +2 -0
- data/lib/rbbt.rb +1 -0
- data/lib/rbbt/util/R.rb +24 -0
- data/lib/rbbt/util/bed.rb +325 -0
- data/lib/rbbt/util/cmd.rb +2 -1
- data/lib/rbbt/util/data_module.rb +25 -34
- data/lib/rbbt/util/excel2tsv.rb +2 -3
- data/lib/rbbt/util/log.rb +5 -0
- data/lib/rbbt/util/misc.rb +29 -1
- data/lib/rbbt/util/open.rb +1 -0
- data/lib/rbbt/util/persistence.rb +109 -0
- data/lib/rbbt/util/pkg_data.rb +114 -62
- data/lib/rbbt/util/rake.rb +78 -0
- data/lib/rbbt/util/tc_hash.rb +7 -1
- data/lib/rbbt/util/tsv.rb +582 -153
- data/lib/rbbt/util/workflow.rb +1 -2
- data/share/lib/R/util.R +89 -0
- data/test/rbbt/util/test_R.rb +9 -0
- data/test/rbbt/util/test_bed.rb +136 -0
- data/test/rbbt/util/test_data_module.rb +10 -10
- data/test/rbbt/util/test_misc.rb +1 -0
- data/test/rbbt/util/test_persistence.rb +60 -0
- data/test/rbbt/util/test_pkg_data.rb +113 -0
- data/test/rbbt/util/test_rake.rb +54 -0
- data/test/rbbt/util/test_tsv.rb +91 -46
- data/test/rbbt/util/test_workflow.rb +5 -2
- data/test/test_helper.rb +4 -0
- data/test/test_pkg.rb +0 -10
- data/test/test_rbbt.rb +3 -48
- metadata +21 -6
data/lib/rbbt/util/excel2tsv.rb
CHANGED
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -80,6 +80,14 @@ module Misc
|
|
80
80
|
new_options
|
81
81
|
end
|
82
82
|
|
83
|
+
def self.process_options(hash, *keys)
|
84
|
+
if keys.length == 1
|
85
|
+
hash.delete keys.first.to_sym
|
86
|
+
else
|
87
|
+
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
83
91
|
def self.string2hash(string)
|
84
92
|
|
85
93
|
options = {}
|
@@ -91,7 +99,7 @@ module Misc
|
|
91
99
|
end
|
92
100
|
|
93
101
|
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
94
|
-
|
102
|
+
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
95
103
|
|
96
104
|
if value == true
|
97
105
|
options[option] = option.to_s.chars.first != '!'
|
@@ -129,10 +137,18 @@ module Misc
|
|
129
137
|
fields.each_with_index{|f,i| return i if f =~ field_re}
|
130
138
|
raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
|
131
139
|
end
|
140
|
+
|
141
|
+
def self.first(list)
|
142
|
+
return nil if list.nil?
|
143
|
+
return list.first
|
144
|
+
end
|
145
|
+
|
132
146
|
end
|
133
147
|
|
134
148
|
module PDF2Text
|
135
149
|
def self.pdf2text(filename)
|
150
|
+
require 'rbbt/util/cmd'
|
151
|
+
require 'rbbt/util/tmpfile'
|
136
152
|
TmpFile.with_file(Open.read(filename)) do |pdf|
|
137
153
|
CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
|
138
154
|
end
|
@@ -169,8 +185,20 @@ class NamedArray < Array
|
|
169
185
|
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
170
186
|
original_values_at(*keys)
|
171
187
|
end
|
188
|
+
|
189
|
+
def zip_fields
|
190
|
+
zipped = self[0].zip(*self[1..-1])
|
191
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
192
|
+
zipped
|
193
|
+
end
|
172
194
|
end
|
173
195
|
|
196
|
+
def benchmark
|
197
|
+
require 'benchmark'
|
198
|
+
puts(Benchmark.measure do
|
199
|
+
yield
|
200
|
+
end)
|
201
|
+
end
|
174
202
|
|
175
203
|
def profile
|
176
204
|
require 'ruby-prof'
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Persistence
|
6
|
+
require 'rbbt/util/tc_hash'
|
7
|
+
TSV = TCHash
|
8
|
+
|
9
|
+
CACHEDIR="/tmp/tsv_persistent_cache"
|
10
|
+
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
11
|
+
|
12
|
+
def self.cachedir=(cachedir)
|
13
|
+
CACHEDIR.replace cachedir
|
14
|
+
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.cachedir
|
18
|
+
CACHEDIR
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.get_persistence_file(file, prefix, options = {})
|
22
|
+
File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.persist(file, prefix = "", persistence_type = :string, options = {})
|
26
|
+
options = Misc.add_defaults options, :persistence => true
|
27
|
+
|
28
|
+
persistence, persistence_file =
|
29
|
+
Misc.process_options options, :persistence, :persistence_file
|
30
|
+
|
31
|
+
filename = Misc.process_options options, :filename
|
32
|
+
filename ||= case
|
33
|
+
when (String === file and File.exists? file)
|
34
|
+
File.expand_path file
|
35
|
+
when File === file
|
36
|
+
File.expand_path file.path
|
37
|
+
when TSV === file
|
38
|
+
file.filename
|
39
|
+
else
|
40
|
+
Digest::MD5.hexdigest(file.inspect)
|
41
|
+
end
|
42
|
+
|
43
|
+
if persistence
|
44
|
+
persistence_file ||= get_persistence_file(filename, prefix, options)
|
45
|
+
|
46
|
+
#{{{ CREATE
|
47
|
+
if ! File.exists? persistence_file
|
48
|
+
Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
|
49
|
+
res = yield file, options, filename, persistence_file
|
50
|
+
if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
|
51
|
+
data, extra = res
|
52
|
+
else
|
53
|
+
data, extra = [res, nil]
|
54
|
+
end
|
55
|
+
ddd data.filename
|
56
|
+
|
57
|
+
case persistence_type.to_sym
|
58
|
+
when :tsv
|
59
|
+
if Hash === data or Object::TSV === data
|
60
|
+
Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
|
61
|
+
per = Persistence::TSV.get persistence_file
|
62
|
+
per.write
|
63
|
+
data.each{|k,v| per[k.to_s] = v}
|
64
|
+
%w(case_insensitive fields key_field type filename). each do |key|
|
65
|
+
if data.respond_to? key
|
66
|
+
per.send "#{key}=".to_sym, data.send(key.to_sym)
|
67
|
+
else
|
68
|
+
per.send "#{key}=".to_sym, extra[key.to_sym]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
per.read
|
72
|
+
|
73
|
+
data = per
|
74
|
+
end
|
75
|
+
when :string
|
76
|
+
Open.write(persistence_file, data.to_s)
|
77
|
+
when :marshal
|
78
|
+
Open.write(persistence_file, Marshal.dump(data))
|
79
|
+
when :yaml
|
80
|
+
Open.write(persistence_file, YAML.dump(data))
|
81
|
+
end
|
82
|
+
|
83
|
+
return [data, extra]
|
84
|
+
|
85
|
+
#{{{ LOAD
|
86
|
+
else
|
87
|
+
Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
|
88
|
+
case persistence_type.to_sym
|
89
|
+
when :tsv
|
90
|
+
data = Persistence::TSV.get persistence_file
|
91
|
+
|
92
|
+
extra = {}
|
93
|
+
%W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
|
94
|
+
|
95
|
+
return [data, extra]
|
96
|
+
when :string
|
97
|
+
return [Open.read(persistence_type), nil]
|
98
|
+
when :marshal
|
99
|
+
return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
|
100
|
+
when :yaml
|
101
|
+
return [File.open(persistence_file){|f| YAML.load(f)}, nil]
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
else
|
106
|
+
yield file, options
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/rbbt/util/pkg_data.rb
CHANGED
@@ -1,10 +1,60 @@
|
|
1
1
|
require 'rbbt/util/open'
|
2
2
|
require 'rbbt/util/tsv'
|
3
3
|
require 'rbbt/util/log'
|
4
|
-
require 'rake'
|
4
|
+
require 'rbbt/util/rake'
|
5
5
|
|
6
6
|
module PKGData
|
7
|
-
|
7
|
+
attr_accessor :claims
|
8
|
+
def self.extended(base)
|
9
|
+
base.claims = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
module Path
|
13
|
+
attr_accessor :base
|
14
|
+
|
15
|
+
def method_missing(name, *args, &block)
|
16
|
+
new = File.join(self.dup, name.to_s)
|
17
|
+
new.extend Path
|
18
|
+
new.base = base
|
19
|
+
new
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](name)
|
23
|
+
new = File.join(self.dup, name.to_s)
|
24
|
+
new.extend Path
|
25
|
+
new.base = base
|
26
|
+
new
|
27
|
+
end
|
28
|
+
|
29
|
+
def tsv(options = {})
|
30
|
+
produce
|
31
|
+
TSV.new self, options
|
32
|
+
end
|
33
|
+
|
34
|
+
def index(field = nil, other = nil, options = {})
|
35
|
+
produce
|
36
|
+
TSV.index self, options.merge(:target => field, :others => other)
|
37
|
+
end
|
38
|
+
|
39
|
+
def open
|
40
|
+
produce
|
41
|
+
Open.open(self)
|
42
|
+
end
|
43
|
+
|
44
|
+
def read
|
45
|
+
produce
|
46
|
+
Open.read(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def produce
|
50
|
+
return if File.exists? self
|
51
|
+
|
52
|
+
Log.debug("Trying to produce '#{ self }'")
|
53
|
+
file, producer = base.reclaim self
|
54
|
+
base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
8
58
|
class SharedirNotFoundError < StandardError; end
|
9
59
|
|
10
60
|
def self.sharedir_for_file(file = __FILE__)
|
@@ -32,79 +82,81 @@ module PKGData
|
|
32
82
|
raise SharedirNotFoundError
|
33
83
|
end
|
34
84
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
else
|
41
|
-
task.sub!(/\/$/,'') if String === task
|
42
|
-
path = File.dirname(path)
|
43
|
-
end
|
85
|
+
def files
|
86
|
+
path = datadir.dup.extend Path
|
87
|
+
path.base = self
|
88
|
+
path
|
89
|
+
end
|
44
90
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
Rake::Task[task].invoke
|
51
|
-
Rake::Task[task].reenable
|
52
|
-
ensure
|
53
|
-
FileUtils.chdir old_dir
|
91
|
+
def in_datadir?(file)
|
92
|
+
if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
|
93
|
+
true
|
94
|
+
else
|
95
|
+
false
|
54
96
|
end
|
55
|
-
true
|
56
97
|
end
|
57
98
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
99
|
+
def claim(file, get = nil, subdir = nil, sharedir = nil)
|
100
|
+
file = case
|
101
|
+
when (file.nil? or file === :all)
|
102
|
+
File.join(datadir, subdir.to_s)
|
103
|
+
when in_datadir?(file)
|
104
|
+
file
|
105
|
+
else
|
106
|
+
File.join(datadir, subdir.to_s, file.to_s)
|
107
|
+
end
|
108
|
+
|
109
|
+
sharedir ||= PKGData.get_caller_sharedir
|
110
|
+
claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
|
111
|
+
produce(file, get, subdir, sharedir) if TSV === get
|
112
|
+
produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
|
113
|
+
end
|
72
114
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
rake_task = nil
|
77
|
-
|
78
|
-
until rake_dir == install_dir
|
79
|
-
return if run_rake(path, rake_dir, rake_task)
|
80
|
-
rake_task = File.join(File.basename(rake_dir), rake_task || "")
|
81
|
-
rake_dir = File.dirname(rake_dir)
|
82
|
-
end
|
115
|
+
def reclaim(file)
|
116
|
+
file = File.expand_path(file.dup)
|
117
|
+
return nil unless in_datadir? file
|
83
118
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
Open.write(path, Open.open(get, :wget_options => {:pipe => true}, :nocache => true))
|
119
|
+
while file != File.expand_path(datadir)
|
120
|
+
if @claims[file]
|
121
|
+
return [file, @claims[file]]
|
88
122
|
end
|
123
|
+
file = File.dirname(file)
|
89
124
|
end
|
125
|
+
nil
|
90
126
|
end
|
91
127
|
|
92
|
-
def
|
93
|
-
|
94
|
-
|
128
|
+
def declaim(file)
|
129
|
+
@claims.delete file if @claims.include? file
|
130
|
+
end
|
95
131
|
|
96
|
-
|
132
|
+
def produce_with_rake(rakefile, subdir, file)
|
133
|
+
task = File.expand_path(file).sub(/^.*#{Regexp.quote(File.join(datadir, subdir))}\/?/, '')
|
134
|
+
RakeHelper.run(rakefile, task, File.join(File.join(datadir, subdir)))
|
135
|
+
end
|
97
136
|
|
98
|
-
|
99
|
-
|
100
|
-
get_datafile(file.to_s, path, get, sharedir)
|
101
|
-
end
|
137
|
+
def produce(file, get, subdir, sharedir)
|
138
|
+
Log.low "Getting data file '#{ file }' into '#{ subdir }'. Get: #{get.class}"
|
102
139
|
|
103
|
-
|
104
|
-
end
|
105
|
-
end
|
140
|
+
FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
|
106
141
|
|
107
|
-
|
108
|
-
|
142
|
+
case
|
143
|
+
when get.nil?
|
144
|
+
FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
|
145
|
+
when Proc === get
|
146
|
+
Open.write(file, get.call)
|
147
|
+
when TSV === get
|
148
|
+
Open.write(file, get.to_s)
|
149
|
+
when ((String === get or Symbol === get) and File.basename(get.to_s) == "Rakefile")
|
150
|
+
if Symbol === get
|
151
|
+
rakefile = File.join(sharedir, subdir, get.to_s)
|
152
|
+
else
|
153
|
+
rakefile = File.join(sharedir, get.to_s)
|
154
|
+
end
|
155
|
+
produce_with_rake(rakefile, subdir, file)
|
156
|
+
when String === get
|
157
|
+
Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
|
158
|
+
else
|
159
|
+
raise "Unknown Get: #{get.class}"
|
160
|
+
end
|
109
161
|
end
|
110
162
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rbbt/util/tsv'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/log'
|
4
|
+
|
5
|
+
module RakeHelper
|
6
|
+
def self.files(rakefile, task = :default, chdir = nil)
|
7
|
+
status = nil
|
8
|
+
files = nil
|
9
|
+
TmpFile.with_file do |f|
|
10
|
+
pid = Process.fork{
|
11
|
+
require 'rake'
|
12
|
+
FileUtils.chdir chdir if chdir
|
13
|
+
|
14
|
+
Rake::FileTask.module_eval do
|
15
|
+
class << self
|
16
|
+
alias_method :old_define_task, :define_task
|
17
|
+
end
|
18
|
+
def self.define_task(file, *args, &block)
|
19
|
+
@@files ||= []
|
20
|
+
@@files << file
|
21
|
+
old_define_task(file, *args, &block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.files
|
25
|
+
@@files
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
load rakefile
|
30
|
+
|
31
|
+
Open.write(f, Rake::FileTask.files * "\n")
|
32
|
+
exit
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
pid, status = Process.waitpid2(pid)
|
37
|
+
files = Open.read(f).split("\n")
|
38
|
+
end
|
39
|
+
raise "Error getting files from Rake: #{ rakefile } " unless status.success?
|
40
|
+
files
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.run(rakefile, task = :default, chdir = nil)
|
44
|
+
pid = Process.fork{
|
45
|
+
require 'rake'
|
46
|
+
FileUtils.chdir chdir if chdir
|
47
|
+
|
48
|
+
Rake::FileTask.module_eval do
|
49
|
+
class << self
|
50
|
+
alias_method :old_define_task, :define_task
|
51
|
+
end
|
52
|
+
def self.define_task(file, *args, &block)
|
53
|
+
@@files ||= []
|
54
|
+
@@files << file
|
55
|
+
old_define_task(file, *args, &block)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.files
|
59
|
+
@@files
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
load rakefile
|
64
|
+
|
65
|
+
task(:default) do |t|
|
66
|
+
Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
|
67
|
+
end
|
68
|
+
|
69
|
+
Rake::Task[task].invoke
|
70
|
+
exit
|
71
|
+
}
|
72
|
+
pid, status = Process.waitpid2(pid)
|
73
|
+
|
74
|
+
raise "Error in Rake: #{ rakefile } => #{ task }" unless status.success?
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|