rbbt-util 5.11.9 → 5.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/tsv/dumper.rb +1 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +6 -2
- data/lib/rbbt/tsv/stream.rb +55 -0
- data/lib/rbbt/tsv/util.rb +7 -1
- data/lib/rbbt/util/misc.rb +5 -762
- data/lib/rbbt/util/misc/concurrent_stream.rb +15 -0
- data/lib/rbbt/util/misc/development.rb +122 -0
- data/lib/rbbt/util/misc/inspect.rb +3 -3
- data/lib/rbbt/util/misc/manipulation.rb +136 -0
- data/lib/rbbt/util/misc/math.rb +50 -0
- data/lib/rbbt/util/misc/objects.rb +79 -0
- data/lib/rbbt/util/misc/omics.rb +10 -0
- data/lib/rbbt/util/misc/options.rb +280 -0
- data/lib/rbbt/util/misc/pipes.rb +140 -20
- data/lib/rbbt/util/misc/system.rb +90 -0
- data/lib/rbbt/util/tar.rb +0 -7
- data/lib/rbbt/workflow/accessor.rb +3 -3
- data/lib/rbbt/workflow/step/run.rb +69 -15
- data/lib/rbbt/workflow/task.rb +7 -5
- data/test/rbbt/tsv/test_stream.rb +92 -0
- data/test/rbbt/tsv/test_util.rb +1 -3
- data/test/rbbt/util/misc/test_pipes.rb +79 -0
- data/test/rbbt/workflow/test_task.rb +1 -0
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fea2c562a8e3611c8c9767589ec160a36c1b988d
|
4
|
+
data.tar.gz: 42c868b4354a14c64c9eb5ea45b4d447c3d1c757
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c85bd1f0ed345e6277daed12a41145134f81914e5d1b678db4676296dcb9a8e1ae1bc66c0b3dcc866dc9cab1ebcb80d50cb7181f4e71d755ecb721e3dffa9f55
|
7
|
+
data.tar.gz: fd38d3d2ef666f8b8b4eae6ce66b8ce45b8c5cb6a57e3bbc28c2900f239495a9f991080026e09dc7fbaca480518095e426f482afcc104d07f656835b80509bf2
|
data/lib/rbbt/persist.rb
CHANGED
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -69,8 +69,8 @@ module TSV
|
|
69
69
|
def self.traverse_io_array(io, options = {}, &block)
|
70
70
|
callback = Misc.process_options options, :callback
|
71
71
|
if callback
|
72
|
-
while
|
73
|
-
res = yield
|
72
|
+
while line = io.gets
|
73
|
+
res = yield line.strip
|
74
74
|
callback.call res
|
75
75
|
end
|
76
76
|
else
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -65,7 +65,7 @@ module TSV
|
|
65
65
|
end
|
66
66
|
|
67
67
|
def chop_line(line)
|
68
|
-
line.split(@sep, -1)
|
68
|
+
@sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
69
69
|
end
|
70
70
|
|
71
71
|
def get_values_single_from_flat(parts)
|
@@ -448,8 +448,12 @@ module TSV
|
|
448
448
|
def options
|
449
449
|
options = {}
|
450
450
|
TSV::ENTRIES.each do |entry|
|
451
|
-
|
451
|
+
if self.respond_to? entry
|
452
|
+
value = self.send(entry)
|
453
|
+
options[entry.to_sym] = value unless value.nil?
|
454
|
+
end
|
452
455
|
end
|
456
|
+
options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
453
457
|
IndiferentHash.setup options
|
454
458
|
end
|
455
459
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rbbt/tsv/parser'
|
2
|
+
require 'rbbt/tsv/dumper'
|
3
|
+
module TSV
|
4
|
+
|
5
|
+
def self.collapse_stream(input, options = {})
|
6
|
+
options = Misc.add_defaults options, :sep => "\t"
|
7
|
+
input_stream = TSV.get_stream input
|
8
|
+
|
9
|
+
sorted_input_stream = Misc.sort_stream input_stream
|
10
|
+
|
11
|
+
parser = TSV::Parser.new sorted_input_stream, options.dup
|
12
|
+
dumper = TSV::Dumper.new parser
|
13
|
+
header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
|
14
|
+
dumper.close_in
|
15
|
+
dumper.close_out
|
16
|
+
dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header
|
17
|
+
dumper
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.paste_streams(inputs, options = {})
|
21
|
+
options = Misc.add_defaults options, :sep => "\t", :sort => false
|
22
|
+
sort = Misc.process_options options, :sort
|
23
|
+
|
24
|
+
input_streams = []
|
25
|
+
input_lines = []
|
26
|
+
input_fields = []
|
27
|
+
input_key_fields = []
|
28
|
+
input_options = []
|
29
|
+
|
30
|
+
input_source_streams = inputs.collect do |input|
|
31
|
+
stream = TSV.get_stream input
|
32
|
+
stream = sort ? Misc.sort_stream(stream) : stream
|
33
|
+
end
|
34
|
+
|
35
|
+
input_source_streams.each do |stream|
|
36
|
+
parser = TSV::Parser.new stream, options
|
37
|
+
input_streams << parser.stream
|
38
|
+
input_lines << parser.first_line
|
39
|
+
input_fields << parser.fields
|
40
|
+
input_key_fields << parser.key_field
|
41
|
+
input_options << parser.options
|
42
|
+
end
|
43
|
+
|
44
|
+
key_field = input_key_fields.first
|
45
|
+
fields = input_fields.flatten
|
46
|
+
options = options.merge(input_options.first)
|
47
|
+
|
48
|
+
dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
|
49
|
+
dumper.close_in
|
50
|
+
dumper.close_out
|
51
|
+
header = TSV.header_lines(key_field, fields, options)
|
52
|
+
dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
|
53
|
+
dumper
|
54
|
+
end
|
55
|
+
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -61,6 +61,7 @@ module TSV
|
|
61
61
|
begin
|
62
62
|
TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
|
63
63
|
rescue
|
64
|
+
Log.exception $!
|
64
65
|
TSV.setup({}, :type => :single, :cast => :to_i)
|
65
66
|
end
|
66
67
|
end
|
@@ -106,6 +107,11 @@ module TSV
|
|
106
107
|
when String
|
107
108
|
raise "Could not open file given by String: #{Misc.fingerprint file}" unless Open.remote?(file) or File.exists? file
|
108
109
|
Open.open(file, open_options)
|
110
|
+
when (defined? Step and Step)
|
111
|
+
stream = file.get_stream
|
112
|
+
stream || get_stream(file.join.path)
|
113
|
+
when TSV::Dumper
|
114
|
+
file.stream
|
109
115
|
else
|
110
116
|
raise "Cannot get stream from: #{file.inspect}"
|
111
117
|
end
|
@@ -134,7 +140,7 @@ module TSV
|
|
134
140
|
sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
|
135
141
|
|
136
142
|
str = ""
|
137
|
-
str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
|
143
|
+
str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
|
138
144
|
if fields
|
139
145
|
str << "#" << key_field << sep << fields * sep << "\n"
|
140
146
|
end
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'lockfile'
|
2
|
-
require 'net/smtp'
|
3
2
|
require 'digest/md5'
|
4
3
|
require 'cgi'
|
5
4
|
require 'zlib'
|
6
5
|
require 'rubygems/package'
|
6
|
+
|
7
7
|
require 'rbbt/util/tar'
|
8
8
|
require 'rbbt/util/misc/exceptions'
|
9
9
|
require 'rbbt/util/misc/concurrent_stream'
|
@@ -15,769 +15,12 @@ require 'rbbt/util/misc/inspect'
|
|
15
15
|
require 'rbbt/util/misc/math'
|
16
16
|
require 'rbbt/util/misc/development'
|
17
17
|
require 'rbbt/util/misc/lock'
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
23
|
-
new = self.values_at(*c)
|
24
|
-
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
25
|
-
acc.concat(new)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
module LaterString
|
31
|
-
def to_s
|
32
|
-
yield
|
33
|
-
end
|
34
|
-
end
|
18
|
+
require 'rbbt/util/misc/options'
|
19
|
+
require 'rbbt/util/misc/system'
|
20
|
+
require 'rbbt/util/misc/objects'
|
21
|
+
require 'rbbt/util/misc/manipulation'
|
35
22
|
|
36
23
|
module Misc
|
37
|
-
|
38
|
-
def self.parse_cmd_params(str)
|
39
|
-
return str if Array === str
|
40
|
-
str.scan(/
|
41
|
-
(?:["']([^"']*?)["']) |
|
42
|
-
([^"'\s]+)
|
43
|
-
/x).flatten.compact
|
44
|
-
end
|
45
|
-
|
46
|
-
def self.pid_exists?(pid)
|
47
|
-
return false if pid.nil?
|
48
|
-
begin
|
49
|
-
Process.getpgid(pid.to_i)
|
50
|
-
true
|
51
|
-
rescue Errno::ESRCH
|
52
|
-
false
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.collapse_ranges(ranges)
|
57
|
-
processed = []
|
58
|
-
last = nil
|
59
|
-
final = []
|
60
|
-
ranges.sort_by{|range| range.begin }.each do |range|
|
61
|
-
rbegin = range.begin
|
62
|
-
rend = range.end
|
63
|
-
if last.nil? or rbegin > last
|
64
|
-
processed << [rbegin, rend]
|
65
|
-
last = rend
|
66
|
-
else
|
67
|
-
new_processed = []
|
68
|
-
processed.each do |pbegin,pend|
|
69
|
-
if pend < rbegin
|
70
|
-
final << [pbegin, pend]
|
71
|
-
else
|
72
|
-
eend = [rend, pend].max
|
73
|
-
new_processed << [pbegin, eend]
|
74
|
-
break
|
75
|
-
end
|
76
|
-
end
|
77
|
-
processed = new_processed
|
78
|
-
last = rend if rend > last
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
final.concat processed
|
83
|
-
final.collect{|b,e| (b..e)}
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.total_length(ranges)
|
87
|
-
Misc.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
|
88
|
-
end
|
89
|
-
|
90
|
-
def self.random_sample_in_range(total, size)
|
91
|
-
p = Set.new
|
92
|
-
|
93
|
-
if size > total / 10
|
94
|
-
template = (0..total - 1).to_a
|
95
|
-
size.times do |i|
|
96
|
-
pos = (rand * (total - i)).floor
|
97
|
-
if pos == template.length - 1
|
98
|
-
v = template.pop
|
99
|
-
else
|
100
|
-
v, n = template[pos], template[-1]
|
101
|
-
template.pop
|
102
|
-
template[pos] = n
|
103
|
-
end
|
104
|
-
p << v
|
105
|
-
end
|
106
|
-
else
|
107
|
-
size.times do
|
108
|
-
pos = nil
|
109
|
-
while pos.nil?
|
110
|
-
pos = (rand * total).floor
|
111
|
-
if p.include? pos
|
112
|
-
pos = nil
|
113
|
-
end
|
114
|
-
end
|
115
|
-
p << pos
|
116
|
-
end
|
117
|
-
end
|
118
|
-
p
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.sample(ary, size, replacement = false)
|
122
|
-
if ary.respond_to? :sample
|
123
|
-
ary.sample size
|
124
|
-
else
|
125
|
-
total = ary.length
|
126
|
-
p = random_sample_in_range(total, size)
|
127
|
-
ary.values_at *p
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
|
132
|
-
def self.prepare_entity(entity, field, options = {})
|
133
|
-
return entity unless defined? Entity
|
134
|
-
return entity unless String === entity or Array === entity
|
135
|
-
options ||= {}
|
136
|
-
|
137
|
-
dup_array = options.delete :dup_array
|
138
|
-
|
139
|
-
if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
|
140
|
-
params = options.dup
|
141
|
-
|
142
|
-
params[:format] ||= params.delete "format"
|
143
|
-
params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
|
144
|
-
|
145
|
-
mod = Entity === field ? field : Entity.formats[field]
|
146
|
-
entity = mod.setup(
|
147
|
-
((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
|
148
|
-
params
|
149
|
-
)
|
150
|
-
end
|
151
|
-
|
152
|
-
entity
|
153
|
-
end
|
154
|
-
|
155
|
-
def self.ensembl_server(organism)
|
156
|
-
date = organism.split("/")[1]
|
157
|
-
if date.nil?
|
158
|
-
"www.ensembl.org"
|
159
|
-
else
|
160
|
-
"#{ date }.archive.ensembl.org"
|
161
|
-
end
|
162
|
-
end
|
163
|
-
|
164
|
-
|
165
|
-
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
166
|
-
name1 ||= "list 1"
|
167
|
-
name2 ||= "list 2"
|
168
|
-
name3 ||= "list 3"
|
169
|
-
|
170
|
-
sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
|
171
|
-
|
172
|
-
total = total.length if Array === total
|
173
|
-
|
174
|
-
label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
|
175
|
-
label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
|
176
|
-
label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
|
177
|
-
if total
|
178
|
-
label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
|
179
|
-
else
|
180
|
-
label << "| INTERSECTION: #{sizes[6]}"
|
181
|
-
end
|
182
|
-
|
183
|
-
max = total || sizes.max
|
184
|
-
sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
|
185
|
-
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
186
|
-
end
|
187
|
-
|
188
|
-
def self.consolidate(list)
|
189
|
-
list.inject(nil){|acc,e|
|
190
|
-
if acc.nil?
|
191
|
-
acc = e
|
192
|
-
else
|
193
|
-
acc.concat e
|
194
|
-
acc
|
195
|
-
end
|
196
|
-
}
|
197
|
-
end
|
198
|
-
|
199
|
-
def self.positional2hash(keys, *values)
|
200
|
-
if Hash === values.last
|
201
|
-
extra = values.pop
|
202
|
-
inputs = Misc.zip2hash(keys, values)
|
203
|
-
inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
|
204
|
-
inputs = Misc.add_defaults inputs, extra
|
205
|
-
inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
|
206
|
-
inputs
|
207
|
-
else
|
208
|
-
Misc.zip2hash(keys, values)
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def self.send_email(from, to, subject, message, options = {})
|
213
|
-
IndiferentHash.setup(options)
|
214
|
-
options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
|
215
|
-
|
216
|
-
server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
|
217
|
-
|
218
|
-
msg = <<-END_OF_MESSAGE
|
219
|
-
From: #{from_alias} <#{from}>
|
220
|
-
To: #{to_alias} <#{to}>
|
221
|
-
Subject: #{subject}
|
222
|
-
|
223
|
-
#{message}
|
224
|
-
END_OF_MESSAGE
|
225
|
-
|
226
|
-
Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
|
227
|
-
smtp.send_message msg, from, to
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
def self.counts(array)
|
232
|
-
counts = {}
|
233
|
-
array.each do |e|
|
234
|
-
counts[e] ||= 0
|
235
|
-
counts[e] += 1
|
236
|
-
end
|
237
|
-
|
238
|
-
counts
|
239
|
-
end
|
240
|
-
|
241
|
-
def self.proportions(array)
|
242
|
-
total = array.length
|
243
|
-
|
244
|
-
proportions = Hash.new 0
|
245
|
-
|
246
|
-
array.each do |e|
|
247
|
-
proportions[e] += 1.0 / total
|
248
|
-
end
|
249
|
-
|
250
|
-
class << proportions; self;end.class_eval do
|
251
|
-
def to_s
|
252
|
-
sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
proportions
|
257
|
-
end
|
258
|
-
|
259
|
-
|
260
|
-
def self.sorted_array_hits(a1, a2)
|
261
|
-
e1, e2 = a1.shift, a2.shift
|
262
|
-
counter = 0
|
263
|
-
match = []
|
264
|
-
while true
|
265
|
-
break if e1.nil? or e2.nil?
|
266
|
-
case e1 <=> e2
|
267
|
-
when 0
|
268
|
-
match << counter
|
269
|
-
e1, e2 = a1.shift, a2.shift
|
270
|
-
counter += 1
|
271
|
-
when -1
|
272
|
-
while not e1.nil? and e1 < e2
|
273
|
-
e1 = a1.shift
|
274
|
-
counter += 1
|
275
|
-
end
|
276
|
-
when 1
|
277
|
-
e2 = a2.shift
|
278
|
-
e2 = a2.shift while not e2.nil? and e2 < e1
|
279
|
-
end
|
280
|
-
end
|
281
|
-
match
|
282
|
-
end
|
283
|
-
|
284
|
-
def self.intersect_sorted_arrays(a1, a2)
|
285
|
-
e1, e2 = a1.shift, a2.shift
|
286
|
-
intersect = []
|
287
|
-
while true
|
288
|
-
break if e1.nil? or e2.nil?
|
289
|
-
case e1 <=> e2
|
290
|
-
when 0
|
291
|
-
intersect << e1
|
292
|
-
e1, e2 = a1.shift, a2.shift
|
293
|
-
when -1
|
294
|
-
e1 = a1.shift while not e1.nil? and e1 < e2
|
295
|
-
when 1
|
296
|
-
e2 = a2.shift
|
297
|
-
e2 = a2.shift while not e2.nil? and e2 < e1
|
298
|
-
end
|
299
|
-
end
|
300
|
-
intersect
|
301
|
-
end
|
302
|
-
|
303
|
-
def self.merge_sorted_arrays(a1, a2)
|
304
|
-
e1, e2 = a1.shift, a2.shift
|
305
|
-
new = []
|
306
|
-
while true
|
307
|
-
case
|
308
|
-
when (e1 and e2)
|
309
|
-
case e1 <=> e2
|
310
|
-
when 0
|
311
|
-
new << e1
|
312
|
-
e1, e2 = a1.shift, a2.shift
|
313
|
-
when -1
|
314
|
-
new << e1
|
315
|
-
e1 = a1.shift
|
316
|
-
when 1
|
317
|
-
new << e2
|
318
|
-
e2 = a2.shift
|
319
|
-
end
|
320
|
-
when e2
|
321
|
-
new << e2
|
322
|
-
new.concat a2
|
323
|
-
break
|
324
|
-
when e1
|
325
|
-
new << e1
|
326
|
-
new.concat a1
|
327
|
-
break
|
328
|
-
else
|
329
|
-
break
|
330
|
-
end
|
331
|
-
end
|
332
|
-
new
|
333
|
-
end
|
334
|
-
|
335
|
-
def self.binary_include?(array, elem)
|
336
|
-
upper = array.size - 1
|
337
|
-
lower = 0
|
338
|
-
|
339
|
-
return -1 if upper < lower
|
340
|
-
|
341
|
-
while(upper >= lower) do
|
342
|
-
idx = lower + (upper - lower) / 2
|
343
|
-
value = array[idx]
|
344
|
-
|
345
|
-
case elem <=> value
|
346
|
-
when 0
|
347
|
-
return true
|
348
|
-
when -1
|
349
|
-
upper = idx - 1
|
350
|
-
when 1
|
351
|
-
lower = idx + 1
|
352
|
-
else
|
353
|
-
raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
|
354
|
-
end
|
355
|
-
end
|
356
|
-
|
357
|
-
return false
|
358
|
-
end
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
def self.array2hash(array, default = nil)
|
363
|
-
hash = {}
|
364
|
-
array.each do |key, value|
|
365
|
-
value = default.dup if value.nil? and not default.nil?
|
366
|
-
hash[key] = value
|
367
|
-
end
|
368
|
-
hash
|
369
|
-
end
|
370
|
-
|
371
|
-
def self.zip2hash(list1, list2)
|
372
|
-
hash = {}
|
373
|
-
list1.each_with_index do |e,i|
|
374
|
-
hash[e] = list2[i]
|
375
|
-
end
|
376
|
-
hash
|
377
|
-
end
|
378
|
-
|
379
|
-
def self.process_to_hash(list)
|
380
|
-
result = yield list
|
381
|
-
zip2hash(list, result)
|
382
|
-
end
|
383
|
-
|
384
|
-
def self.env_add(var, value, sep = ":", prepend = true)
|
385
|
-
ENV[var] ||= ""
|
386
|
-
return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
|
387
|
-
if prepend
|
388
|
-
ENV[var] = value + sep + ENV[var]
|
389
|
-
else
|
390
|
-
ENV[var] += sep + ENV[var]
|
391
|
-
end
|
392
|
-
end
|
393
|
-
|
394
|
-
def self.do_once(&block)
|
395
|
-
return nil if $__did_once
|
396
|
-
$__did_once = true
|
397
|
-
yield
|
398
|
-
nil
|
399
|
-
end
|
400
|
-
|
401
|
-
def self.reset_do_once
|
402
|
-
$__did_once = false
|
403
|
-
end
|
404
|
-
|
405
|
-
def self.insist(times = 3, sleep = nil, msg = nil)
|
406
|
-
if Array === times
|
407
|
-
sleep_array = times
|
408
|
-
times = sleep_array.length
|
409
|
-
sleep = sleep_array.shift
|
410
|
-
end
|
411
|
-
try = 0
|
412
|
-
begin
|
413
|
-
yield
|
414
|
-
rescue
|
415
|
-
if msg
|
416
|
-
Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
|
417
|
-
else
|
418
|
-
Log.warn("Insisting after exception: #{$!.message}")
|
419
|
-
end
|
420
|
-
if sleep and try > 0
|
421
|
-
sleep sleep
|
422
|
-
sleep = sleep_array.shift if sleep_array
|
423
|
-
else
|
424
|
-
Thread.pass
|
425
|
-
end
|
426
|
-
try += 1
|
427
|
-
retry if try < times
|
428
|
-
raise $!
|
429
|
-
end
|
430
|
-
end
|
431
|
-
|
432
|
-
def self.try3times(&block)
|
433
|
-
insist(3, &block)
|
434
|
-
end
|
435
|
-
|
436
|
-
def self.hash2string(hash)
|
437
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
438
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
439
|
-
[ Symbol === k ? ":" << k.to_s : k,
|
440
|
-
Symbol === v ? ":" << v.to_s : v] * "="
|
441
|
-
}.compact * "#"
|
442
|
-
end
|
443
|
-
|
444
|
-
def self.GET_params2hash(string)
|
445
|
-
hash = {}
|
446
|
-
string.split('&').collect{|item|
|
447
|
-
key, value = item.split("=").values_at 0, 1
|
448
|
-
hash[key] = value.nil? ? "" : CGI.unescape(value)
|
449
|
-
}
|
450
|
-
hash
|
451
|
-
end
|
452
|
-
|
453
|
-
def self.hash2GET_params(hash)
|
454
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
455
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
|
456
|
-
v = case
|
457
|
-
when Symbol === v
|
458
|
-
v.to_s
|
459
|
-
when Array === v
|
460
|
-
v * ","
|
461
|
-
else
|
462
|
-
CGI.escape(v.to_s)
|
463
|
-
end
|
464
|
-
[ Symbol === k ? k.to_s : k, v] * "="
|
465
|
-
}.compact * "&"
|
466
|
-
end
|
467
|
-
|
468
|
-
def self.hash_to_html_tag_attributes(hash)
|
469
|
-
return "" if hash.nil? or hash.empty?
|
470
|
-
hash.collect{|k,v|
|
471
|
-
case
|
472
|
-
when (k.nil? or v.nil? or (String === v and v.empty?))
|
473
|
-
nil
|
474
|
-
when Array === v
|
475
|
-
[k,"'" << v * " " << "'"] * "="
|
476
|
-
when String === v
|
477
|
-
[k,"'" << v << "'"] * "="
|
478
|
-
when Symbol === v
|
479
|
-
[k,"'" << v.to_s << "'"] * "="
|
480
|
-
when TrueClass === v
|
481
|
-
[k,"'" << v.to_s << "'"] * "="
|
482
|
-
when (Fixnum === v or Float === v)
|
483
|
-
[k,"'" << v.to_s << "'"] * "="
|
484
|
-
else
|
485
|
-
nil
|
486
|
-
end
|
487
|
-
}.compact * " "
|
488
|
-
end
|
489
|
-
|
490
|
-
def self.html_tag(tag, content = nil, params = {})
|
491
|
-
attr_str = hash_to_html_tag_attributes(params)
|
492
|
-
attr_str = " " << attr_str if String === attr_str and attr_str != ""
|
493
|
-
html = if content.nil?
|
494
|
-
"<#{ tag }#{attr_str}/>"
|
495
|
-
else
|
496
|
-
"<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
|
497
|
-
end
|
498
|
-
|
499
|
-
html
|
500
|
-
end
|
501
|
-
|
502
|
-
def self.path_relative_to(basedir, path)
|
503
|
-
path = File.expand_path(path) unless path[0] == "/"
|
504
|
-
basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
505
|
-
|
506
|
-
if path.index(basedir) == 0
|
507
|
-
if basedir[-1] == "/"
|
508
|
-
return path[basedir.length..-1]
|
509
|
-
else
|
510
|
-
return path[basedir.length+1..-1]
|
511
|
-
end
|
512
|
-
else
|
513
|
-
return nil
|
514
|
-
end
|
515
|
-
end
|
516
|
-
|
517
|
-
def self.hostname
|
518
|
-
@hostanem ||= `hostname`.strip
|
519
|
-
end
|
520
|
-
|
521
|
-
|
522
|
-
def self.common_path(dir, file)
|
523
|
-
file = File.expand_path file
|
524
|
-
dir = File.expand_path dir
|
525
|
-
|
526
|
-
return true if file == dir
|
527
|
-
while File.dirname(file) != file
|
528
|
-
file = File.dirname(file)
|
529
|
-
return true if file == dir
|
530
|
-
end
|
531
|
-
|
532
|
-
return false
|
533
|
-
end
|
534
|
-
|
535
|
-
# WARN: probably not thread safe...
|
536
|
-
def self.in_dir(dir)
|
537
|
-
old_pwd = FileUtils.pwd
|
538
|
-
res = nil
|
539
|
-
begin
|
540
|
-
FileUtils.mkdir_p dir unless File.exists? dir
|
541
|
-
FileUtils.cd dir
|
542
|
-
res = yield
|
543
|
-
ensure
|
544
|
-
FileUtils.cd old_pwd
|
545
|
-
end
|
546
|
-
res
|
547
|
-
end
|
548
|
-
|
549
|
-
|
550
|
-
def self.add_defaults(options, defaults = {})
|
551
|
-
case
|
552
|
-
when Hash === options
|
553
|
-
new_options = options.dup
|
554
|
-
when String === options
|
555
|
-
new_options = string2hash options
|
556
|
-
else
|
557
|
-
raise "Format of '#{options.inspect}' not understood. It should be a hash"
|
558
|
-
end
|
559
|
-
|
560
|
-
defaults.each do |key, value|
|
561
|
-
next if options.include? key
|
562
|
-
|
563
|
-
new_options[key] = value
|
564
|
-
end
|
565
|
-
|
566
|
-
new_options
|
567
|
-
end
|
568
|
-
|
569
|
-
def self.process_options(hash, *keys)
|
570
|
-
if keys.length == 1
|
571
|
-
hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
|
572
|
-
else
|
573
|
-
keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
|
574
|
-
end
|
575
|
-
end
|
576
|
-
|
577
|
-
def self.pull_keys(hash, prefix)
|
578
|
-
new = {}
|
579
|
-
hash.keys.each do |key|
|
580
|
-
if key.to_s =~ /#{ prefix }_(.*)/
|
581
|
-
case
|
582
|
-
when String === key
|
583
|
-
new[$1] = hash.delete key
|
584
|
-
when Symbol === key
|
585
|
-
new[$1.to_sym] = hash.delete key
|
586
|
-
end
|
587
|
-
else
|
588
|
-
if key.to_s == prefix.to_s
|
589
|
-
new[key] = hash.delete key
|
590
|
-
end
|
591
|
-
end
|
592
|
-
end
|
593
|
-
|
594
|
-
new
|
595
|
-
end
|
596
|
-
|
597
|
-
def self.string2const(string)
|
598
|
-
return nil if string.nil?
|
599
|
-
mod = Kernel
|
600
|
-
|
601
|
-
string.to_s.split('::').each do |str|
|
602
|
-
mod = mod.const_get str
|
603
|
-
end
|
604
|
-
|
605
|
-
mod
|
606
|
-
end
|
607
|
-
|
608
|
-
def self.string2hash_old(string)
|
609
|
-
|
610
|
-
options = {}
|
611
|
-
string.split(/#/).each do |str|
|
612
|
-
if str.match(/(.*)=(.*)/)
|
613
|
-
option, value = $1, $2
|
614
|
-
else
|
615
|
-
option, value = str, true
|
616
|
-
end
|
617
|
-
|
618
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
619
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
620
|
-
|
621
|
-
if value == true
|
622
|
-
options[option] = option.to_s.chars.first != '!'
|
623
|
-
else
|
624
|
-
options[option] = Thread.start do
|
625
|
-
$SAFE = 0;
|
626
|
-
case
|
627
|
-
when value =~ /^(?:true|T)$/i
|
628
|
-
true
|
629
|
-
when value =~ /^(?:false|F)$/i
|
630
|
-
false
|
631
|
-
when Symbol === value
|
632
|
-
value
|
633
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
634
|
-
Regexp.new /#{$1}/
|
635
|
-
else
|
636
|
-
begin
|
637
|
-
Kernel.const_get value
|
638
|
-
rescue
|
639
|
-
begin
|
640
|
-
raise if value =~ /[a-z]/ and defined? value
|
641
|
-
eval(value)
|
642
|
-
rescue Exception
|
643
|
-
value
|
644
|
-
end
|
645
|
-
end
|
646
|
-
end
|
647
|
-
end.value
|
648
|
-
end
|
649
|
-
end
|
650
|
-
|
651
|
-
options
|
652
|
-
end
|
653
|
-
|
654
|
-
def self.string2hash(string)
|
655
|
-
options = {}
|
656
|
-
|
657
|
-
string.split('#').each do |str|
|
658
|
-
key, sep, value = str.partition "="
|
659
|
-
|
660
|
-
key = key[1..-1].to_sym if key[0] == ":"
|
661
|
-
|
662
|
-
options[key] = true and next if value.empty?
|
663
|
-
options[key] = value[1..-1].to_sym and next if value[0] == ":"
|
664
|
-
options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
|
665
|
-
options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
|
666
|
-
options[key] = value.to_i and next if value =~ /^\d+$/
|
667
|
-
options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
|
668
|
-
options[key] = true and next if value == "true"
|
669
|
-
options[key] = false and next if value == "false"
|
670
|
-
options[key] = value and next
|
671
|
-
|
672
|
-
options[key] = begin
|
673
|
-
saved_safe = $SAFE
|
674
|
-
$SAFE = 0
|
675
|
-
eval(value)
|
676
|
-
rescue Exception
|
677
|
-
value
|
678
|
-
ensure
|
679
|
-
$SAFE = saved_safe
|
680
|
-
end
|
681
|
-
end
|
682
|
-
|
683
|
-
return options
|
684
|
-
|
685
|
-
options = {}
|
686
|
-
string.split(/#/).each do |str|
|
687
|
-
if str.match(/(.*)=(.*)/)
|
688
|
-
option, value = $1, $2
|
689
|
-
else
|
690
|
-
option, value = str, true
|
691
|
-
end
|
692
|
-
|
693
|
-
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
694
|
-
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
695
|
-
|
696
|
-
if value == true
|
697
|
-
options[option] = option.to_s.chars.first != '!'
|
698
|
-
else
|
699
|
-
options[option] = Thread.start do
|
700
|
-
$SAFE = 0;
|
701
|
-
case
|
702
|
-
when value =~ /^(?:true|T)$/i
|
703
|
-
true
|
704
|
-
when value =~ /^(?:false|F)$/i
|
705
|
-
false
|
706
|
-
when Symbol === value
|
707
|
-
value
|
708
|
-
when (String === value and value =~ /^\/(.*)\/$/)
|
709
|
-
Regexp.new /#{$1}/
|
710
|
-
else
|
711
|
-
begin
|
712
|
-
Kernel.const_get value
|
713
|
-
rescue
|
714
|
-
begin
|
715
|
-
raise if value =~ /[a-z]/ and defined? value
|
716
|
-
eval(value)
|
717
|
-
rescue Exception
|
718
|
-
value
|
719
|
-
end
|
720
|
-
end
|
721
|
-
end
|
722
|
-
end.value
|
723
|
-
end
|
724
|
-
end
|
725
|
-
|
726
|
-
options
|
727
|
-
end
|
728
|
-
|
729
|
-
def self.field_position(fields, field, quiet = false)
|
730
|
-
return field if Integer === field or Range === field
|
731
|
-
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
732
|
-
fields.each_with_index{|f,i| return i if f == field}
|
733
|
-
field_re = Regexp.new /^#{field}$/i
|
734
|
-
fields.each_with_index{|f,i| return i if f =~ field_re}
|
735
|
-
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
736
|
-
end
|
737
|
-
|
738
|
-
# Divides the array into +num+ chunks of the same size by placing one
|
739
|
-
# element in each chunk iteratively.
|
740
|
-
def self.divide(array, num)
|
741
|
-
num = 1 if num == 0
|
742
|
-
chunks = []
|
743
|
-
num.to_i.times do chunks << [] end
|
744
|
-
array.each_with_index{|e, i|
|
745
|
-
c = i % num
|
746
|
-
chunks[c] << e
|
747
|
-
}
|
748
|
-
chunks
|
749
|
-
end
|
750
|
-
|
751
|
-
# Divides the array into chunks of +num+ same size by placing one
|
752
|
-
# element in each chunk iteratively.
|
753
|
-
def self.ordered_divide(array, num)
|
754
|
-
last = array.length - 1
|
755
|
-
chunks = []
|
756
|
-
current = 0
|
757
|
-
while current <= last
|
758
|
-
next_current = [last, current + num - 1].min
|
759
|
-
chunks << array[current..next_current]
|
760
|
-
current = next_current + 1
|
761
|
-
end
|
762
|
-
chunks
|
763
|
-
end
|
764
|
-
|
765
|
-
def self.append_zipped(current, new)
|
766
|
-
current.each do |v|
|
767
|
-
n = new.shift
|
768
|
-
if Array === n
|
769
|
-
v.concat new
|
770
|
-
else
|
771
|
-
v << n
|
772
|
-
end
|
773
|
-
end
|
774
|
-
current
|
775
|
-
end
|
776
|
-
|
777
|
-
def self.zip_fields(array)
|
778
|
-
return [] if array.empty? or (first = array.first).nil?
|
779
|
-
first.zip(*array[1..-1])
|
780
|
-
end
|
781
24
|
end
|
782
25
|
|
783
26
|
module PDF2Text
|