rbbt-util 5.11.9 → 5.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/tsv/dumper.rb +1 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +6 -2
- data/lib/rbbt/tsv/stream.rb +55 -0
- data/lib/rbbt/tsv/util.rb +7 -1
- data/lib/rbbt/util/misc.rb +5 -762
- data/lib/rbbt/util/misc/concurrent_stream.rb +15 -0
- data/lib/rbbt/util/misc/development.rb +122 -0
- data/lib/rbbt/util/misc/inspect.rb +3 -3
- data/lib/rbbt/util/misc/manipulation.rb +136 -0
- data/lib/rbbt/util/misc/math.rb +50 -0
- data/lib/rbbt/util/misc/objects.rb +79 -0
- data/lib/rbbt/util/misc/omics.rb +10 -0
- data/lib/rbbt/util/misc/options.rb +280 -0
- data/lib/rbbt/util/misc/pipes.rb +140 -20
- data/lib/rbbt/util/misc/system.rb +90 -0
- data/lib/rbbt/util/tar.rb +0 -7
- data/lib/rbbt/workflow/accessor.rb +3 -3
- data/lib/rbbt/workflow/step/run.rb +69 -15
- data/lib/rbbt/workflow/task.rb +7 -5
- data/test/rbbt/tsv/test_stream.rb +92 -0
- data/test/rbbt/tsv/test_util.rb +1 -3
- data/test/rbbt/util/misc/test_pipes.rb +79 -0
- data/test/rbbt/workflow/test_task.rb +1 -0
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fea2c562a8e3611c8c9767589ec160a36c1b988d
|
4
|
+
data.tar.gz: 42c868b4354a14c64c9eb5ea45b4d447c3d1c757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c85bd1f0ed345e6277daed12a41145134f81914e5d1b678db4676296dcb9a8e1ae1bc66c0b3dcc866dc9cab1ebcb80d50cb7181f4e71d755ecb721e3dffa9f55
|
7
|
+
data.tar.gz: fd38d3d2ef666f8b8b4eae6ce66b8ce45b8c5cb6a57e3bbc28c2900f239495a9f991080026e09dc7fbaca480518095e426f482afcc104d07f656835b80509bf2
|
data/lib/rbbt/persist.rb
CHANGED
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -69,8 +69,8 @@ module TSV
|
|
69
69
|
def self.traverse_io_array(io, options = {}, &block)
|
70
70
|
callback = Misc.process_options options, :callback
|
71
71
|
if callback
|
72
|
-
while
|
73
|
-
res = yield
|
72
|
+
while line = io.gets
|
73
|
+
res = yield line.strip
|
74
74
|
callback.call res
|
75
75
|
end
|
76
76
|
else
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -65,7 +65,7 @@ module TSV
|
|
65
65
|
end
|
66
66
|
|
67
67
|
def chop_line(line)
|
68
|
-
line.split(@sep, -1)
|
68
|
+
@sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
69
69
|
end
|
70
70
|
|
71
71
|
def get_values_single_from_flat(parts)
|
@@ -448,8 +448,12 @@ module TSV
|
|
448
448
|
def options
|
449
449
|
options = {}
|
450
450
|
TSV::ENTRIES.each do |entry|
|
451
|
-
|
451
|
+
if self.respond_to? entry
|
452
|
+
value = self.send(entry)
|
453
|
+
options[entry.to_sym] = value unless value.nil?
|
454
|
+
end
|
452
455
|
end
|
456
|
+
options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
453
457
|
IndiferentHash.setup options
|
454
458
|
end
|
455
459
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rbbt/tsv/parser'
|
2
|
+
require 'rbbt/tsv/dumper'
|
3
|
+
module TSV
|
4
|
+
|
5
|
+
def self.collapse_stream(input, options = {})
|
6
|
+
options = Misc.add_defaults options, :sep => "\t"
|
7
|
+
input_stream = TSV.get_stream input
|
8
|
+
|
9
|
+
sorted_input_stream = Misc.sort_stream input_stream
|
10
|
+
|
11
|
+
parser = TSV::Parser.new sorted_input_stream, options.dup
|
12
|
+
dumper = TSV::Dumper.new parser
|
13
|
+
header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
|
14
|
+
dumper.close_in
|
15
|
+
dumper.close_out
|
16
|
+
dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header
|
17
|
+
dumper
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.paste_streams(inputs, options = {})
|
21
|
+
options = Misc.add_defaults options, :sep => "\t", :sort => false
|
22
|
+
sort = Misc.process_options options, :sort
|
23
|
+
|
24
|
+
input_streams = []
|
25
|
+
input_lines = []
|
26
|
+
input_fields = []
|
27
|
+
input_key_fields = []
|
28
|
+
input_options = []
|
29
|
+
|
30
|
+
input_source_streams = inputs.collect do |input|
|
31
|
+
stream = TSV.get_stream input
|
32
|
+
stream = sort ? Misc.sort_stream(stream) : stream
|
33
|
+
end
|
34
|
+
|
35
|
+
input_source_streams.each do |stream|
|
36
|
+
parser = TSV::Parser.new stream, options
|
37
|
+
input_streams << parser.stream
|
38
|
+
input_lines << parser.first_line
|
39
|
+
input_fields << parser.fields
|
40
|
+
input_key_fields << parser.key_field
|
41
|
+
input_options << parser.options
|
42
|
+
end
|
43
|
+
|
44
|
+
key_field = input_key_fields.first
|
45
|
+
fields = input_fields.flatten
|
46
|
+
options = options.merge(input_options.first)
|
47
|
+
|
48
|
+
dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
|
49
|
+
dumper.close_in
|
50
|
+
dumper.close_out
|
51
|
+
header = TSV.header_lines(key_field, fields, options)
|
52
|
+
dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
|
53
|
+
dumper
|
54
|
+
end
|
55
|
+
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -61,6 +61,7 @@ module TSV
|
|
61
61
|
begin
|
62
62
|
TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
|
63
63
|
rescue
|
64
|
+
Log.exception $!
|
64
65
|
TSV.setup({}, :type => :single, :cast => :to_i)
|
65
66
|
end
|
66
67
|
end
|
@@ -106,6 +107,11 @@ module TSV
|
|
106
107
|
when String
|
107
108
|
raise "Could not open file given by String: #{Misc.fingerprint file}" unless Open.remote?(file) or File.exists? file
|
108
109
|
Open.open(file, open_options)
|
110
|
+
when (defined? Step and Step)
|
111
|
+
stream = file.get_stream
|
112
|
+
stream || get_stream(file.join.path)
|
113
|
+
when TSV::Dumper
|
114
|
+
file.stream
|
109
115
|
else
|
110
116
|
raise "Cannot get stream from: #{file.inspect}"
|
111
117
|
end
|
@@ -134,7 +140,7 @@ module TSV
|
|
134
140
|
sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
|
135
141
|
|
136
142
|
str = ""
|
137
|
-
str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
|
143
|
+
str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
|
138
144
|
if fields
|
139
145
|
str << "#" << key_field << sep << fields * sep << "\n"
|
140
146
|
end
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'lockfile'
|
2
|
-
require 'net/smtp'
|
3
2
|
require 'digest/md5'
|
4
3
|
require 'cgi'
|
5
4
|
require 'zlib'
|
6
5
|
require 'rubygems/package'
|
6
|
+
|
7
7
|
require 'rbbt/util/tar'
|
8
8
|
require 'rbbt/util/misc/exceptions'
|
9
9
|
require 'rbbt/util/misc/concurrent_stream'
|
@@ -15,769 +15,12 @@ require 'rbbt/util/misc/inspect'
|
|
15
15
|
require 'rbbt/util/misc/math'
|
16
16
|
require 'rbbt/util/misc/development'
|
17
17
|
require 'rbbt/util/misc/lock'
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
23
|
-
new = self.values_at(*c)
|
24
|
-
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
25
|
-
acc.concat(new)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
module LaterString
|
31
|
-
def to_s
|
32
|
-
yield
|
33
|
-
end
|
34
|
-
end
|
18
|
+
require 'rbbt/util/misc/options'
|
19
|
+
require 'rbbt/util/misc/system'
|
20
|
+
require 'rbbt/util/misc/objects'
|
21
|
+
require 'rbbt/util/misc/manipulation'
|
35
22
|
|
36
23
|
module Misc
|
37
|
-
|
38
|
-
def self.parse_cmd_params(str)
|
39
|
-
return str if Array === str
|
40
|
-
str.scan(/
|
41
|
-
(?:["']([^"']*?)["']) |
|
42
|
-
([^"'\s]+)
|
43
|
-
/x).flatten.compact
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.pid_exists?(pid)
|
47
|
-
return false if pid.nil?
|
48
|
-
begin
|
49
|
-
Process.getpgid(pid.to_i)
|
50
|
-
true
|
51
|
-
rescue Errno::ESRCH
|
52
|
-
false
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.collapse_ranges(ranges)
|
57
|
-
processed = []
|
58
|
-
last = nil
|
59
|
-
final = []
|
60
|
-
ranges.sort_by{|range| range.begin }.each do |range|
|
61
|
-
rbegin = range.begin
|
62
|
-
rend = range.end
|
63
|
-
if last.nil? or rbegin > last
|
64
|
-
processed << [rbegin, rend]
|
65
|
-
last = rend
|
66
|
-
else
|
67
|
-
new_processed = []
|
68
|
-
processed.each do |pbegin,pend|
|
69
|
-
if pend < rbegin
|
70
|
-
final << [pbegin, pend]
|
71
|
-
else
|
72
|
-
eend = [rend, pend].max
|
73
|
-
new_processed << [pbegin, eend]
|
74
|
-
break
|
75
|
-
end
|
76
|
-
end
|
77
|
-
processed = new_processed
|
78
|
-
last = rend if rend > last
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
final.concat processed
|
83
|
-
final.collect{|b,e| (b..e)}
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.total_length(ranges)
|
87
|
-
Misc.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.random_sample_in_range(total, size)
|
91
|
-
p = Set.new
|
92
|
-
|
93
|
-
if size > total / 10
|
94
|
-
template = (0..total - 1).to_a
|
95
|
-
size.times do |i|
|
96
|
-
pos = (rand * (total - i)).floor
|
97
|
-
if pos == template.length - 1
|
98
|
-
v = template.pop
|
99
|
-
else
|
100
|
-
v, n = template[pos], template[-1]
|
101
|
-
template.pop
|
102
|
-
template[pos] = n
|
103
|
-
end
|
104
|
-
p << v
|
105
|
-
end
|
106
|
-
else
|
107
|
-
size.times do
|
108
|
-
pos = nil
|
109
|
-
while pos.nil?
|
110
|
-
pos = (rand * total).floor
|
111
|
-
if p.include? pos
|
112
|
-
pos = nil
|
113
|
-
end
|
114
|
-
end
|
115
|
-
p << pos
|
116
|
-
end
|
117
|
-
end
|
118
|
-
p
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.sample(ary, size, replacement = false)
|
122
|
-
if ary.respond_to? :sample
|
123
|
-
ary.sample size
|
124
|
-
else
|
125
|
-
total = ary.length
|
126
|
-
p = random_sample_in_range(total, size)
|
127
|
-
ary.values_at *p
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
|
132
|
-
def self.prepare_entity(entity, field, options = {})
|
133
|
-
return entity unless defined? Entity
|
134
|
-
return entity unless String === entity or Array === entity
|
135
|
-
options ||= {}
|
136
|
-
|
137
|
-
dup_array = options.delete :dup_array
|
138
|
-
|
139
|
-
if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
|
140
|
-
params = options.dup
|
141
|
-
|
142
|
-
params[:format] ||= params.delete "format"
|
143
|
-
params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
|
144
|
-
|
145
|
-
mod = Entity === field ? field : Entity.formats[field]
|
146
|
-
entity = mod.setup(
|
147
|
-
((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
|
148
|
-
params
|
149
|
-
)
|
150
|
-
end
|
151
|
-
|
152
|
-
entity
|
153
|
-
end
|
154
|
-
|
155
|
-
def self.ensembl_server(organism)
|
156
|
-
date = organism.split("/")[1]
|
157
|
-
if date.nil?
|
158
|
-
"www.ensembl.org"
|
159
|
-
else
|
160
|
-
"#{ date }.archive.ensembl.org"
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
|
165
|
-
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
166
|
-
name1 ||= "list 1"
|
167
|
-
name2 ||= "list 2"
|
168
|
-
name3 ||= "list 3"
|
169
|
-
|
170
|
-
sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
|
171
|
-
|
172
|
-
total = total.length if Array === total
|
173
|
-
|
174
|
-
label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
|
175
|
-
label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
|
176
|
-
label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
|
177
|
-
if total
|
178
|
-
label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
|
179
|
-
else
|
180
|
-
label << "| INTERSECTION: #{sizes[6]}"
|
181
|
-
end
|
182
|
-
|
183
|
-
max = total || sizes.max
|
184
|
-
sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
|
185
|
-
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
186
|
-
end
|
187
|
-
|
188
|
-
def self.consolidate(list)
|
189
|
-
list.inject(nil){|acc,e|
|
190
|
-
if acc.nil?
|
191
|
-
acc = e
|
192
|
-
else
|
193
|
-
acc.concat e
|
194
|
-
acc
|
195
|
-
end
|
196
|
-
}
|
197
|
-
end
|
198
|
-
|
199
|
-
def self.positional2hash(keys, *values)
|
200
|
-
if Hash === values.last
|
201
|
-
extra = values.pop
|
202
|
-
inputs = Misc.zip2hash(keys, values)
|
203
|
-
inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
|
204
|
-
inputs = Misc.add_defaults inputs, extra
|
205
|
-
inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
|
206
|
-
inputs
|
207
|
-
else
|
208
|
-
Misc.zip2hash(keys, values)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def self.send_email(from, to, subject, message, options = {})
|
213
|
-
IndiferentHash.setup(options)
|
214
|
-
options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
|
215
|
-
|
216
|
-
server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
|
217
|
-
|
218
|
-
msg = <<-END_OF_MESSAGE
|
219
|
-
From: #{from_alias} <#{from}>
|
220
|
-
To: #{to_alias} <#{to}>
|
221
|
-
Subject: #{subject}
|
222
|
-
|
223
|
-
#{message}
|
224
|
-
END_OF_MESSAGE
|
225
|
-
|
226
|
-
Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
|
227
|
-
smtp.send_message msg, from, to
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
def self.counts(array)
|
232
|
-
counts = {}
|
233
|
-
array.each do |e|
|
234
|
-
counts[e] ||= 0
|
235
|
-
counts[e] += 1
|
236
|
-
end
|
237
|
-
|
238
|
-
counts
|
239
|
-
end
|
240
|
-
|
241
|
-
def self.proportions(array)
|
242
|
-
total = array.length
|
243
|
-
|
244
|
-
proportions = Hash.new 0
|
245
|
-
|
246
|
-
array.each do |e|
|
247
|
-
proportions[e] += 1.0 / total
|
248
|
-
end
|
249
|
-
|
250
|
-
class << proportions; self;end.class_eval do
|
251
|
-
def to_s
|
252
|
-
sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
proportions
|
257
|
-
end
|
258
|
-
|
259
|
-
|
260
|
-
def self.sorted_array_hits(a1, a2)
|
261
|
-
e1, e2 = a1.shift, a2.shift
|
262
|
-
counter = 0
|
263
|
-
match = []
|
264
|
-
while true
|
265
|
-
break if e1.nil? or e2.nil?
|
266
|
-
case e1 <=> e2
|
267
|
-
when 0
|
268
|
-
match << counter
|
269
|
-
e1, e2 = a1.shift, a2.shift
|
270
|
-
counter += 1
|
271
|
-
when -1
|
272
|
-
while not e1.nil? and e1 < e2
|
273
|
-
e1 = a1.shift
|
274
|
-
counter += 1
|
275
|
-
end
|
276
|
-
when 1
|
277
|
-
e2 = a2.shift
|
278
|
-
e2 = a2.shift while not e2.nil? and e2 < e1
|
279
|
-
end
|
280
|
-
end
|
281
|
-
match
|
282
|
-
end
|
283
|
-
|
284
|
-
def self.intersect_sorted_arrays(a1, a2)
|
285
|
-
e1, e2 = a1.shift, a2.shift
|
286
|
-
intersect = []
|
287
|
-
while true
|
288
|
-
break if e1.nil? or e2.nil?
|
289
|
-
case e1 <=> e2
|
290
|
-
when 0
|
291
|
-
intersect << e1
|
292
|
-
e1, e2 = a1.shift, a2.shift
|
293
|
-
when -1
|
294
|
-
e1 = a1.shift while not e1.nil? and e1 < e2
|
295
|
-
when 1
|
296
|
-
e2 = a2.shift
|
297
|
-
e2 = a2.shift while not e2.nil? and e2 < e1
|
298
|
-
end
|
299
|
-
end
|
300
|
-
intersect
|
301
|
-
end
|
302
|
-
|
303
|
-
def self.merge_sorted_arrays(a1, a2)
|
304
|
-
e1, e2 = a1.shift, a2.shift
|
305
|
-
new = []
|
306
|
-
while true
|
307
|
-
case
|
308
|
-
when (e1 and e2)
|
309
|
-
case e1 <=> e2
|
310
|
-
when 0
|
311
|
-
new << e1
|
312
|
-
e1, e2 = a1.shift, a2.shift
|
313
|
-
when -1
|
314
|
-
new << e1
|
315
|
-
e1 = a1.shift
|
316
|
-
when 1
|
317
|
-
new << e2
|
318
|
-
e2 = a2.shift
|
319
|
-
end
|
320
|
-
when e2
|
321
|
-
new << e2
|
322
|
-
new.concat a2
|
323
|
-
break
|
324
|
-
when e1
|
325
|
-
new << e1
|
326
|
-
new.concat a1
|
327
|
-
break
|
328
|
-
else
|
329
|
-
break
|
330
|
-
end
|
331
|
-
end
|
332
|
-
new
|
333
|
-
end
|
334
|
-
|
335
|
-
def self.binary_include?(array, elem)
|
336
|
-
upper = array.size - 1
|
337
|
-
lower = 0
|
338
|
-
|
339
|
-
return -1 if upper < lower
|
340
|
-
|
341
|
-
while(upper >= lower) do
|
342
|
-
idx = lower + (upper - lower) / 2
|
343
|
-
value = array[idx]
|
344
|
-
|
345
|
-
case elem <=> value
|
346
|
-
when 0
|
347
|
-
return true
|
348
|
-
when -1
|
349
|
-
upper = idx - 1
|
350
|
-
when 1
|
351
|
-
lower = idx + 1
|
352
|
-
else
|
353
|
-
raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
return false
|
358
|
-
end
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
def self.array2hash(array, default = nil)
|
363
|
-
hash = {}
|
364
|
-
array.each do |key, value|
|
365
|
-
value = default.dup if value.nil? and not default.nil?
|
366
|
-
hash[key] = value
|
367
|
-
end
|
368
|
-
hash
|
369
|
-
end
|
370
|
-
|
371
|
-
def self.zip2hash(list1, list2)
|
372
|
-
hash = {}
|
373
|
-
list1.each_with_index do |e,i|
|
374
|
-
hash[e] = list2[i]
|
375
|
-
end
|
376
|
-
hash
|
377
|
-
end
|
378
|
-
|
379
|
-
def self.process_to_hash(list)
|
380
|
-
result = yield list
|
381
|
-
zip2hash(list, result)
|
382
|
-
end
|
383
|
-
|
384
|
-
def self.env_add(var, value, sep = ":", prepend = true)
|
385
|
-
ENV[var] ||= ""
|
386
|
-
return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
|
387
|
-
if prepend
|
388
|
-
ENV[var] = value + sep + ENV[var]
|
389
|
-
else
|
390
|
-
ENV[var] += sep + ENV[var]
|
391
|
-
end
|
392
|
-
end
|
393
|
-
|
394
|
-
def self.do_once(&block)
|
395
|
-
return nil if $__did_once
|
396
|
-
$__did_once = true
|
397
|
-
yield
|
398
|
-
nil
|
399
|
-
end
|
400
|
-
|
401
|
-
def self.reset_do_once
|
402
|
-
$__did_once = false
|
403
|
-
end
|
404
|
-
|
405
|
-
def self.insist(times = 3, sleep = nil, msg = nil)
|
406
|
-
if Array === times
|
407
|
-
sleep_array = times
|
408
|
-
times = sleep_array.length
|
409
|
-
sleep = sleep_array.shift
|
410
|
-
end
|
411
|
-
try = 0
|
412
|
-
begin
|
413
|
-
yield
|
414
|
-
rescue
|
415
|
-
if msg
|
416
|
-
Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
|
417
|
-
else
|
418
|
-
Log.warn("Insisting after exception: #{$!.message}")
|
419
|
-
end
|
420
|
-
if sleep and try > 0
|
421
|
-
sleep sleep
|
422
|
-
sleep = sleep_array.shift if sleep_array
|
423
|
-
else
|
424
|
-
Thread.pass
|
425
|
-
end
|
426
|
-
try += 1
|
427
|
-
retry if try < times
|
428
|
-
raise $!
|
429
|
-
end
|
430
|
-
end
|
431
|
-
|
432
|
-
def self.try3times(&block)
|
433
|
-
insist(3, &block)
|
434
|
-
end
|
435
|
-
|
436
|
-
def self.hash2string(hash)
|
437
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
438
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
439
|
-
[ Symbol === k ? ":" << k.to_s : k,
|
440
|
-
Symbol === v ? ":" << v.to_s : v] * "="
|
441
|
-
}.compact * "#"
|
442
|
-
end
|
443
|
-
|
444
|
-
def self.GET_params2hash(string)
|
445
|
-
hash = {}
|
446
|
-
string.split('&').collect{|item|
|
447
|
-
key, value = item.split("=").values_at 0, 1
|
448
|
-
hash[key] = value.nil? ? "" : CGI.unescape(value)
|
449
|
-
}
|
450
|
-
hash
|
451
|
-
end
|
452
|
-
|
453
|
-
def self.hash2GET_params(hash)
|
454
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
455
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
|
456
|
-
v = case
|
457
|
-
when Symbol === v
|
458
|
-
v.to_s
|
459
|
-
when Array === v
|
460
|
-
v * ","
|
461
|
-
else
|
462
|
-
CGI.escape(v.to_s)
|
463
|
-
end
|
464
|
-
[ Symbol === k ? k.to_s : k, v] * "="
|
465
|
-
}.compact * "&"
|
466
|
-
end
|
467
|
-
|
468
|
-
def self.hash_to_html_tag_attributes(hash)
|
469
|
-
return "" if hash.nil? or hash.empty?
|
470
|
-
hash.collect{|k,v|
|
471
|
-
case
|
472
|
-
when (k.nil? or v.nil? or (String === v and v.empty?))
|
473
|
-
nil
|
474
|
-
when Array === v
|
475
|
-
[k,"'" << v * " " << "'"] * "="
|
476
|
-
when String === v
|
477
|
-
[k,"'" << v << "'"] * "="
|
478
|
-
when Symbol === v
|
479
|
-
[k,"'" << v.to_s << "'"] * "="
|
480
|
-
when TrueClass === v
|
481
|
-
[k,"'" << v.to_s << "'"] * "="
|
482
|
-
when (Fixnum === v or Float === v)
|
483
|
-
[k,"'" << v.to_s << "'"] * "="
|
484
|
-
else
|
485
|
-
nil
|
486
|
-
end
|
487
|
-
}.compact * " "
|
488
|
-
end
|
489
|
-
|
490
|
-
def self.html_tag(tag, content = nil, params = {})
|
491
|
-
attr_str = hash_to_html_tag_attributes(params)
|
492
|
-
attr_str = " " << attr_str if String === attr_str and attr_str != ""
|
493
|
-
html = if content.nil?
|
494
|
-
"<#{ tag }#{attr_str}/>"
|
495
|
-
else
|
496
|
-
"<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
|
497
|
-
end
|
498
|
-
|
499
|
-
html
|
500
|
-
end
|
501
|
-
|
502
|
-
def self.path_relative_to(basedir, path)
|
503
|
-
path = File.expand_path(path) unless path[0] == "/"
|
504
|
-
basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
505
|
-
|
506
|
-
if path.index(basedir) == 0
|
507
|
-
if basedir[-1] == "/"
|
508
|
-
return path[basedir.length..-1]
|
509
|
-
else
|
510
|
-
return path[basedir.length+1..-1]
|
511
|
-
end
|
512
|
-
else
|
513
|
-
return nil
|
514
|
-
end
|
515
|
-
end
|
516
|
-
|
517
|
-
def self.hostname
|
518
|
-
@hostanem ||= `hostname`.strip
|
519
|
-
end
|
520
|
-
|
521
|
-
|
522
|
-
def self.common_path(dir, file)
|
523
|
-
file = File.expand_path file
|
524
|
-
dir = File.expand_path dir
|
525
|
-
|
526
|
-
return true if file == dir
|
527
|
-
while File.dirname(file) != file
|
528
|
-
file = File.dirname(file)
|
529
|
-
return true if file == dir
|
530
|
-
end
|
531
|
-
|
532
|
-
return false
|
533
|
-
end
|
534
|
-
|
535
|
-
# WARN: probably not thread safe...
|
536
|
-
def self.in_dir(dir)
|
537
|
-
old_pwd = FileUtils.pwd
|
538
|
-
res = nil
|
539
|
-
begin
|
540
|
-
FileUtils.mkdir_p dir unless File.exists? dir
|
541
|
-
FileUtils.cd dir
|
542
|
-
res = yield
|
543
|
-
ensure
|
544
|
-
FileUtils.cd old_pwd
|
545
|
-
end
|
546
|
-
res
|
547
|
-
end
|
548
|
-
|
549
|
-
|
550
|
-
def self.add_defaults(options, defaults = {})
|
551
|
-
case
|
552
|
-
when Hash === options
|
553
|
-
new_options = options.dup
|
554
|
-
when String === options
|
555
|
-
new_options = string2hash options
|
556
|
-
else
|
557
|
-
raise "Format of '#{options.inspect}' not understood. It should be a hash"
|
558
|
-
end
|
559
|
-
|
560
|
-
defaults.each do |key, value|
|
561
|
-
next if options.include? key
|
562
|
-
|
563
|
-
new_options[key] = value
|
564
|
-
end
|
565
|
-
|
566
|
-
new_options
|
567
|
-
end
|
568
|
-
|
569
|
-
def self.process_options(hash, *keys)
|
570
|
-
if keys.length == 1
|
571
|
-
hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
|
572
|
-
else
|
573
|
-
keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
|
574
|
-
end
|
575
|
-
end
|
576
|
-
|
577
|
-
def self.pull_keys(hash, prefix)
|
578
|
-
new = {}
|
579
|
-
hash.keys.each do |key|
|
580
|
-
if key.to_s =~ /#{ prefix }_(.*)/
|
581
|
-
case
|
582
|
-
when String === key
|
583
|
-
new[$1] = hash.delete key
|
584
|
-
when Symbol === key
|
585
|
-
new[$1.to_sym] = hash.delete key
|
586
|
-
end
|
587
|
-
else
|
588
|
-
if key.to_s == prefix.to_s
|
589
|
-
new[key] = hash.delete key
|
590
|
-
end
|
591
|
-
end
|
592
|
-
end
|
593
|
-
|
594
|
-
new
|
595
|
-
end
|
596
|
-
|
597
|
-
def self.string2const(string)
|
598
|
-
return nil if string.nil?
|
599
|
-
mod = Kernel
|
600
|
-
|
601
|
-
string.to_s.split('::').each do |str|
|
602
|
-
mod = mod.const_get str
|
603
|
-
end
|
604
|
-
|
605
|
-
mod
|
606
|
-
end
|
607
|
-
|
608
|
-
def self.string2hash_old(string)
|
609
|
-
|
610
|
-
options = {}
|
611
|
-
string.split(/#/).each do |str|
|
612
|
-
if str.match(/(.*)=(.*)/)
|
613
|
-
option, value = $1, $2
|
614
|
-
else
|
615
|
-
option, value = str, true
|
616
|
-
end
|
617
|
-
|
618
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
619
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
620
|
-
|
621
|
-
if value == true
|
622
|
-
options[option] = option.to_s.chars.first != '!'
|
623
|
-
else
|
624
|
-
options[option] = Thread.start do
|
625
|
-
$SAFE = 0;
|
626
|
-
case
|
627
|
-
when value =~ /^(?:true|T)$/i
|
628
|
-
true
|
629
|
-
when value =~ /^(?:false|F)$/i
|
630
|
-
false
|
631
|
-
when Symbol === value
|
632
|
-
value
|
633
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
634
|
-
Regexp.new /#{$1}/
|
635
|
-
else
|
636
|
-
begin
|
637
|
-
Kernel.const_get value
|
638
|
-
rescue
|
639
|
-
begin
|
640
|
-
raise if value =~ /[a-z]/ and defined? value
|
641
|
-
eval(value)
|
642
|
-
rescue Exception
|
643
|
-
value
|
644
|
-
end
|
645
|
-
end
|
646
|
-
end
|
647
|
-
end.value
|
648
|
-
end
|
649
|
-
end
|
650
|
-
|
651
|
-
options
|
652
|
-
end
|
653
|
-
|
654
|
-
def self.string2hash(string)
|
655
|
-
options = {}
|
656
|
-
|
657
|
-
string.split('#').each do |str|
|
658
|
-
key, sep, value = str.partition "="
|
659
|
-
|
660
|
-
key = key[1..-1].to_sym if key[0] == ":"
|
661
|
-
|
662
|
-
options[key] = true and next if value.empty?
|
663
|
-
options[key] = value[1..-1].to_sym and next if value[0] == ":"
|
664
|
-
options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
|
665
|
-
options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
|
666
|
-
options[key] = value.to_i and next if value =~ /^\d+$/
|
667
|
-
options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
|
668
|
-
options[key] = true and next if value == "true"
|
669
|
-
options[key] = false and next if value == "false"
|
670
|
-
options[key] = value and next
|
671
|
-
|
672
|
-
options[key] = begin
|
673
|
-
saved_safe = $SAFE
|
674
|
-
$SAFE = 0
|
675
|
-
eval(value)
|
676
|
-
rescue Exception
|
677
|
-
value
|
678
|
-
ensure
|
679
|
-
$SAFE = saved_safe
|
680
|
-
end
|
681
|
-
end
|
682
|
-
|
683
|
-
return options
|
684
|
-
|
685
|
-
options = {}
|
686
|
-
string.split(/#/).each do |str|
|
687
|
-
if str.match(/(.*)=(.*)/)
|
688
|
-
option, value = $1, $2
|
689
|
-
else
|
690
|
-
option, value = str, true
|
691
|
-
end
|
692
|
-
|
693
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
694
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
695
|
-
|
696
|
-
if value == true
|
697
|
-
options[option] = option.to_s.chars.first != '!'
|
698
|
-
else
|
699
|
-
options[option] = Thread.start do
|
700
|
-
$SAFE = 0;
|
701
|
-
case
|
702
|
-
when value =~ /^(?:true|T)$/i
|
703
|
-
true
|
704
|
-
when value =~ /^(?:false|F)$/i
|
705
|
-
false
|
706
|
-
when Symbol === value
|
707
|
-
value
|
708
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
709
|
-
Regexp.new /#{$1}/
|
710
|
-
else
|
711
|
-
begin
|
712
|
-
Kernel.const_get value
|
713
|
-
rescue
|
714
|
-
begin
|
715
|
-
raise if value =~ /[a-z]/ and defined? value
|
716
|
-
eval(value)
|
717
|
-
rescue Exception
|
718
|
-
value
|
719
|
-
end
|
720
|
-
end
|
721
|
-
end
|
722
|
-
end.value
|
723
|
-
end
|
724
|
-
end
|
725
|
-
|
726
|
-
options
|
727
|
-
end
|
728
|
-
|
729
|
-
def self.field_position(fields, field, quiet = false)
|
730
|
-
return field if Integer === field or Range === field
|
731
|
-
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
732
|
-
fields.each_with_index{|f,i| return i if f == field}
|
733
|
-
field_re = Regexp.new /^#{field}$/i
|
734
|
-
fields.each_with_index{|f,i| return i if f =~ field_re}
|
735
|
-
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
736
|
-
end
|
737
|
-
|
738
|
-
# Divides the array into +num+ chunks of the same size by placing one
|
739
|
-
# element in each chunk iteratively.
|
740
|
-
def self.divide(array, num)
|
741
|
-
num = 1 if num == 0
|
742
|
-
chunks = []
|
743
|
-
num.to_i.times do chunks << [] end
|
744
|
-
array.each_with_index{|e, i|
|
745
|
-
c = i % num
|
746
|
-
chunks[c] << e
|
747
|
-
}
|
748
|
-
chunks
|
749
|
-
end
|
750
|
-
|
751
|
-
# Divides the array into chunks of +num+ same size by placing one
|
752
|
-
# element in each chunk iteratively.
|
753
|
-
def self.ordered_divide(array, num)
|
754
|
-
last = array.length - 1
|
755
|
-
chunks = []
|
756
|
-
current = 0
|
757
|
-
while current <= last
|
758
|
-
next_current = [last, current + num - 1].min
|
759
|
-
chunks << array[current..next_current]
|
760
|
-
current = next_current + 1
|
761
|
-
end
|
762
|
-
chunks
|
763
|
-
end
|
764
|
-
|
765
|
-
def self.append_zipped(current, new)
|
766
|
-
current.each do |v|
|
767
|
-
n = new.shift
|
768
|
-
if Array === n
|
769
|
-
v.concat new
|
770
|
-
else
|
771
|
-
v << n
|
772
|
-
end
|
773
|
-
end
|
774
|
-
current
|
775
|
-
end
|
776
|
-
|
777
|
-
def self.zip_fields(array)
|
778
|
-
return [] if array.empty? or (first = array.first).nil?
|
779
|
-
first.zip(*array[1..-1])
|
780
|
-
end
|
781
24
|
end
|
782
25
|
|
783
26
|
module PDF2Text
|