rbbt-util 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt-util.rb +2 -0
- data/lib/rbbt.rb +1 -0
- data/lib/rbbt/util/R.rb +24 -0
- data/lib/rbbt/util/bed.rb +325 -0
- data/lib/rbbt/util/cmd.rb +2 -1
- data/lib/rbbt/util/data_module.rb +25 -34
- data/lib/rbbt/util/excel2tsv.rb +2 -3
- data/lib/rbbt/util/log.rb +5 -0
- data/lib/rbbt/util/misc.rb +29 -1
- data/lib/rbbt/util/open.rb +1 -0
- data/lib/rbbt/util/persistence.rb +109 -0
- data/lib/rbbt/util/pkg_data.rb +114 -62
- data/lib/rbbt/util/rake.rb +78 -0
- data/lib/rbbt/util/tc_hash.rb +7 -1
- data/lib/rbbt/util/tsv.rb +582 -153
- data/lib/rbbt/util/workflow.rb +1 -2
- data/share/lib/R/util.R +89 -0
- data/test/rbbt/util/test_R.rb +9 -0
- data/test/rbbt/util/test_bed.rb +136 -0
- data/test/rbbt/util/test_data_module.rb +10 -10
- data/test/rbbt/util/test_misc.rb +1 -0
- data/test/rbbt/util/test_persistence.rb +60 -0
- data/test/rbbt/util/test_pkg_data.rb +113 -0
- data/test/rbbt/util/test_rake.rb +54 -0
- data/test/rbbt/util/test_tsv.rb +91 -46
- data/test/rbbt/util/test_workflow.rb +5 -2
- data/test/test_helper.rb +4 -0
- data/test/test_pkg.rb +0 -10
- data/test/test_rbbt.rb +3 -48
- metadata +21 -6
data/lib/rbbt/util/excel2tsv.rb
CHANGED
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -80,6 +80,14 @@ module Misc
|
|
80
80
|
new_options
|
81
81
|
end
|
82
82
|
|
83
|
+
def self.process_options(hash, *keys)
|
84
|
+
if keys.length == 1
|
85
|
+
hash.delete keys.first.to_sym
|
86
|
+
else
|
87
|
+
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
83
91
|
def self.string2hash(string)
|
84
92
|
|
85
93
|
options = {}
|
@@ -91,7 +99,7 @@ module Misc
|
|
91
99
|
end
|
92
100
|
|
93
101
|
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
94
|
-
|
102
|
+
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
95
103
|
|
96
104
|
if value == true
|
97
105
|
options[option] = option.to_s.chars.first != '!'
|
@@ -129,10 +137,18 @@ module Misc
|
|
129
137
|
fields.each_with_index{|f,i| return i if f =~ field_re}
|
130
138
|
raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
|
131
139
|
end
|
140
|
+
|
141
|
+
def self.first(list)
|
142
|
+
return nil if list.nil?
|
143
|
+
return list.first
|
144
|
+
end
|
145
|
+
|
132
146
|
end
|
133
147
|
|
134
148
|
module PDF2Text
|
135
149
|
def self.pdf2text(filename)
|
150
|
+
require 'rbbt/util/cmd'
|
151
|
+
require 'rbbt/util/tmpfile'
|
136
152
|
TmpFile.with_file(Open.read(filename)) do |pdf|
|
137
153
|
CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
|
138
154
|
end
|
@@ -169,8 +185,20 @@ class NamedArray < Array
|
|
169
185
|
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
170
186
|
original_values_at(*keys)
|
171
187
|
end
|
188
|
+
|
189
|
+
def zip_fields
|
190
|
+
zipped = self[0].zip(*self[1..-1])
|
191
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
192
|
+
zipped
|
193
|
+
end
|
172
194
|
end
|
173
195
|
|
196
|
+
def benchmark
|
197
|
+
require 'benchmark'
|
198
|
+
puts(Benchmark.measure do
|
199
|
+
yield
|
200
|
+
end)
|
201
|
+
end
|
174
202
|
|
175
203
|
def profile
|
176
204
|
require 'ruby-prof'
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module Persistence
|
6
|
+
require 'rbbt/util/tc_hash'
|
7
|
+
TSV = TCHash
|
8
|
+
|
9
|
+
CACHEDIR="/tmp/tsv_persistent_cache"
|
10
|
+
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
11
|
+
|
12
|
+
def self.cachedir=(cachedir)
|
13
|
+
CACHEDIR.replace cachedir
|
14
|
+
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.cachedir
|
18
|
+
CACHEDIR
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.get_persistence_file(file, prefix, options = {})
|
22
|
+
File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.persist(file, prefix = "", persistence_type = :string, options = {})
|
26
|
+
options = Misc.add_defaults options, :persistence => true
|
27
|
+
|
28
|
+
persistence, persistence_file =
|
29
|
+
Misc.process_options options, :persistence, :persistence_file
|
30
|
+
|
31
|
+
filename = Misc.process_options options, :filename
|
32
|
+
filename ||= case
|
33
|
+
when (String === file and File.exists? file)
|
34
|
+
File.expand_path file
|
35
|
+
when File === file
|
36
|
+
File.expand_path file.path
|
37
|
+
when TSV === file
|
38
|
+
file.filename
|
39
|
+
else
|
40
|
+
Digest::MD5.hexdigest(file.inspect)
|
41
|
+
end
|
42
|
+
|
43
|
+
if persistence
|
44
|
+
persistence_file ||= get_persistence_file(filename, prefix, options)
|
45
|
+
|
46
|
+
#{{{ CREATE
|
47
|
+
if ! File.exists? persistence_file
|
48
|
+
Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
|
49
|
+
res = yield file, options, filename, persistence_file
|
50
|
+
if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
|
51
|
+
data, extra = res
|
52
|
+
else
|
53
|
+
data, extra = [res, nil]
|
54
|
+
end
|
55
|
+
ddd data.filename
|
56
|
+
|
57
|
+
case persistence_type.to_sym
|
58
|
+
when :tsv
|
59
|
+
if Hash === data or Object::TSV === data
|
60
|
+
Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
|
61
|
+
per = Persistence::TSV.get persistence_file
|
62
|
+
per.write
|
63
|
+
data.each{|k,v| per[k.to_s] = v}
|
64
|
+
%w(case_insensitive fields key_field type filename). each do |key|
|
65
|
+
if data.respond_to? key
|
66
|
+
per.send "#{key}=".to_sym, data.send(key.to_sym)
|
67
|
+
else
|
68
|
+
per.send "#{key}=".to_sym, extra[key.to_sym]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
per.read
|
72
|
+
|
73
|
+
data = per
|
74
|
+
end
|
75
|
+
when :string
|
76
|
+
Open.write(persistence_file, data.to_s)
|
77
|
+
when :marshal
|
78
|
+
Open.write(persistence_file, Marshal.dump(data))
|
79
|
+
when :yaml
|
80
|
+
Open.write(persistence_file, YAML.dump(data))
|
81
|
+
end
|
82
|
+
|
83
|
+
return [data, extra]
|
84
|
+
|
85
|
+
#{{{ LOAD
|
86
|
+
else
|
87
|
+
Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
|
88
|
+
case persistence_type.to_sym
|
89
|
+
when :tsv
|
90
|
+
data = Persistence::TSV.get persistence_file
|
91
|
+
|
92
|
+
extra = {}
|
93
|
+
%W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
|
94
|
+
|
95
|
+
return [data, extra]
|
96
|
+
when :string
|
97
|
+
return [Open.read(persistence_type), nil]
|
98
|
+
when :marshal
|
99
|
+
return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
|
100
|
+
when :yaml
|
101
|
+
return [File.open(persistence_file){|f| YAML.load(f)}, nil]
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
else
|
106
|
+
yield file, options
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/rbbt/util/pkg_data.rb
CHANGED
@@ -1,10 +1,60 @@
|
|
1
1
|
require 'rbbt/util/open'
|
2
2
|
require 'rbbt/util/tsv'
|
3
3
|
require 'rbbt/util/log'
|
4
|
-
require 'rake'
|
4
|
+
require 'rbbt/util/rake'
|
5
5
|
|
6
6
|
module PKGData
|
7
|
-
|
7
|
+
attr_accessor :claims
|
8
|
+
def self.extended(base)
|
9
|
+
base.claims = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
module Path
|
13
|
+
attr_accessor :base
|
14
|
+
|
15
|
+
def method_missing(name, *args, &block)
|
16
|
+
new = File.join(self.dup, name.to_s)
|
17
|
+
new.extend Path
|
18
|
+
new.base = base
|
19
|
+
new
|
20
|
+
end
|
21
|
+
|
22
|
+
def [](name)
|
23
|
+
new = File.join(self.dup, name.to_s)
|
24
|
+
new.extend Path
|
25
|
+
new.base = base
|
26
|
+
new
|
27
|
+
end
|
28
|
+
|
29
|
+
def tsv(options = {})
|
30
|
+
produce
|
31
|
+
TSV.new self, options
|
32
|
+
end
|
33
|
+
|
34
|
+
def index(field = nil, other = nil, options = {})
|
35
|
+
produce
|
36
|
+
TSV.index self, options.merge(:target => field, :others => other)
|
37
|
+
end
|
38
|
+
|
39
|
+
def open
|
40
|
+
produce
|
41
|
+
Open.open(self)
|
42
|
+
end
|
43
|
+
|
44
|
+
def read
|
45
|
+
produce
|
46
|
+
Open.read(self)
|
47
|
+
end
|
48
|
+
|
49
|
+
def produce
|
50
|
+
return if File.exists? self
|
51
|
+
|
52
|
+
Log.debug("Trying to produce '#{ self }'")
|
53
|
+
file, producer = base.reclaim self
|
54
|
+
base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
8
58
|
class SharedirNotFoundError < StandardError; end
|
9
59
|
|
10
60
|
def self.sharedir_for_file(file = __FILE__)
|
@@ -32,79 +82,81 @@ module PKGData
|
|
32
82
|
raise SharedirNotFoundError
|
33
83
|
end
|
34
84
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
else
|
41
|
-
task.sub!(/\/$/,'') if String === task
|
42
|
-
path = File.dirname(path)
|
43
|
-
end
|
85
|
+
def files
|
86
|
+
path = datadir.dup.extend Path
|
87
|
+
path.base = self
|
88
|
+
path
|
89
|
+
end
|
44
90
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
Rake::Task[task].invoke
|
51
|
-
Rake::Task[task].reenable
|
52
|
-
ensure
|
53
|
-
FileUtils.chdir old_dir
|
91
|
+
def in_datadir?(file)
|
92
|
+
if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
|
93
|
+
true
|
94
|
+
else
|
95
|
+
false
|
54
96
|
end
|
55
|
-
true
|
56
97
|
end
|
57
98
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
99
|
+
def claim(file, get = nil, subdir = nil, sharedir = nil)
|
100
|
+
file = case
|
101
|
+
when (file.nil? or file === :all)
|
102
|
+
File.join(datadir, subdir.to_s)
|
103
|
+
when in_datadir?(file)
|
104
|
+
file
|
105
|
+
else
|
106
|
+
File.join(datadir, subdir.to_s, file.to_s)
|
107
|
+
end
|
108
|
+
|
109
|
+
sharedir ||= PKGData.get_caller_sharedir
|
110
|
+
claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
|
111
|
+
produce(file, get, subdir, sharedir) if TSV === get
|
112
|
+
produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
|
113
|
+
end
|
72
114
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
rake_task = nil
|
77
|
-
|
78
|
-
until rake_dir == install_dir
|
79
|
-
return if run_rake(path, rake_dir, rake_task)
|
80
|
-
rake_task = File.join(File.basename(rake_dir), rake_task || "")
|
81
|
-
rake_dir = File.dirname(rake_dir)
|
82
|
-
end
|
115
|
+
def reclaim(file)
|
116
|
+
file = File.expand_path(file.dup)
|
117
|
+
return nil unless in_datadir? file
|
83
118
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
Open.write(path, Open.open(get, :wget_options => {:pipe => true}, :nocache => true))
|
119
|
+
while file != File.expand_path(datadir)
|
120
|
+
if @claims[file]
|
121
|
+
return [file, @claims[file]]
|
88
122
|
end
|
123
|
+
file = File.dirname(file)
|
89
124
|
end
|
125
|
+
nil
|
90
126
|
end
|
91
127
|
|
92
|
-
def
|
93
|
-
|
94
|
-
|
128
|
+
def declaim(file)
|
129
|
+
@claims.delete file if @claims.include? file
|
130
|
+
end
|
95
131
|
|
96
|
-
|
132
|
+
def produce_with_rake(rakefile, subdir, file)
|
133
|
+
task = File.expand_path(file).sub(/^.*#{Regexp.quote(File.join(datadir, subdir))}\/?/, '')
|
134
|
+
RakeHelper.run(rakefile, task, File.join(File.join(datadir, subdir)))
|
135
|
+
end
|
97
136
|
|
98
|
-
|
99
|
-
|
100
|
-
get_datafile(file.to_s, path, get, sharedir)
|
101
|
-
end
|
137
|
+
def produce(file, get, subdir, sharedir)
|
138
|
+
Log.low "Getting data file '#{ file }' into '#{ subdir }'. Get: #{get.class}"
|
102
139
|
|
103
|
-
|
104
|
-
end
|
105
|
-
end
|
140
|
+
FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
|
106
141
|
|
107
|
-
|
108
|
-
|
142
|
+
case
|
143
|
+
when get.nil?
|
144
|
+
FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
|
145
|
+
when Proc === get
|
146
|
+
Open.write(file, get.call)
|
147
|
+
when TSV === get
|
148
|
+
Open.write(file, get.to_s)
|
149
|
+
when ((String === get or Symbol === get) and File.basename(get.to_s) == "Rakefile")
|
150
|
+
if Symbol === get
|
151
|
+
rakefile = File.join(sharedir, subdir, get.to_s)
|
152
|
+
else
|
153
|
+
rakefile = File.join(sharedir, get.to_s)
|
154
|
+
end
|
155
|
+
produce_with_rake(rakefile, subdir, file)
|
156
|
+
when String === get
|
157
|
+
Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
|
158
|
+
else
|
159
|
+
raise "Unknown Get: #{get.class}"
|
160
|
+
end
|
109
161
|
end
|
110
162
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'rbbt/util/tsv'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/log'
|
4
|
+
|
5
|
+
module RakeHelper
|
6
|
+
def self.files(rakefile, task = :default, chdir = nil)
|
7
|
+
status = nil
|
8
|
+
files = nil
|
9
|
+
TmpFile.with_file do |f|
|
10
|
+
pid = Process.fork{
|
11
|
+
require 'rake'
|
12
|
+
FileUtils.chdir chdir if chdir
|
13
|
+
|
14
|
+
Rake::FileTask.module_eval do
|
15
|
+
class << self
|
16
|
+
alias_method :old_define_task, :define_task
|
17
|
+
end
|
18
|
+
def self.define_task(file, *args, &block)
|
19
|
+
@@files ||= []
|
20
|
+
@@files << file
|
21
|
+
old_define_task(file, *args, &block)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.files
|
25
|
+
@@files
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
load rakefile
|
30
|
+
|
31
|
+
Open.write(f, Rake::FileTask.files * "\n")
|
32
|
+
exit
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
pid, status = Process.waitpid2(pid)
|
37
|
+
files = Open.read(f).split("\n")
|
38
|
+
end
|
39
|
+
raise "Error getting files from Rake: #{ rakefile } " unless status.success?
|
40
|
+
files
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.run(rakefile, task = :default, chdir = nil)
|
44
|
+
pid = Process.fork{
|
45
|
+
require 'rake'
|
46
|
+
FileUtils.chdir chdir if chdir
|
47
|
+
|
48
|
+
Rake::FileTask.module_eval do
|
49
|
+
class << self
|
50
|
+
alias_method :old_define_task, :define_task
|
51
|
+
end
|
52
|
+
def self.define_task(file, *args, &block)
|
53
|
+
@@files ||= []
|
54
|
+
@@files << file
|
55
|
+
old_define_task(file, *args, &block)
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.files
|
59
|
+
@@files
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
load rakefile
|
64
|
+
|
65
|
+
task(:default) do |t|
|
66
|
+
Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
|
67
|
+
end
|
68
|
+
|
69
|
+
Rake::Task[task].invoke
|
70
|
+
exit
|
71
|
+
}
|
72
|
+
pid, status = Process.waitpid2(pid)
|
73
|
+
|
74
|
+
raise "Error in Rake: #{ rakefile } => #{ task }" unless status.success?
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|