rbbt-util 5.11.9 → 5.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2dfad08e355d768565decb173f5a86198c3600e2
4
- data.tar.gz: 64421ddf30a3066b1313c1762a9c1b41cb9a719b
3
+ metadata.gz: fea2c562a8e3611c8c9767589ec160a36c1b988d
4
+ data.tar.gz: 42c868b4354a14c64c9eb5ea45b4d447c3d1c757
5
5
  SHA512:
6
- metadata.gz: f415797c4922dea7a9f3ecc8ff636cd3681da6f22b0111c34e1210e9beed4430c8c17c4787e9fe5ad4ad2e63d014be2308a533d220f87e2f40224b9d91daadaa
7
- data.tar.gz: 8b7ad6d7440ece8580bab37f20c6e7e86158953a73dce168816b26e6003cb9966dd40359a798a0eb4abc152393c86c922f93ce3d2a27c272a47ebbbba5907d35
6
+ metadata.gz: c85bd1f0ed345e6277daed12a41145134f81914e5d1b678db4676296dcb9a8e1ae1bc66c0b3dcc866dc9cab1ebcb80d50cb7181f4e71d755ecb721e3dffa9f55
7
+ data.tar.gz: fd38d3d2ef666f8b8b4eae6ce66b8ce45b8c5cb6a57e3bbc28c2900f239495a9f991080026e09dc7fbaca480518095e426f482afcc104d07f656835b80509bf2
data/lib/rbbt/persist.rb CHANGED
@@ -465,7 +465,7 @@ module Persist
465
465
  res.abort if res.respond_to? :abort
466
466
  raise $!
467
467
  end
468
- when TSV::Dumper
468
+ when (defined? TSV and TSV::Dumper)
469
469
  begin
470
470
  io = res.stream
471
471
  res = TSV.open(io)
data/lib/rbbt/tsv.rb CHANGED
@@ -15,6 +15,7 @@ require 'rbbt/tsv/attach'
15
15
  require 'rbbt/tsv/filter'
16
16
  require 'rbbt/tsv/field_index'
17
17
  require 'rbbt/tsv/parallel'
18
+ require 'rbbt/tsv/stream'
18
19
 
19
20
  module TSV
20
21
  class << self
@@ -64,8 +64,7 @@ module TSV
64
64
  end
65
65
 
66
66
  def close_out
67
- Log.debug "Close out #{@stream.inspect}"
68
- @stream.close
67
+ @stream.close unless @stream.closed?
69
68
  end
70
69
 
71
70
  def close_in
@@ -69,8 +69,8 @@ module TSV
69
69
  def self.traverse_io_array(io, options = {}, &block)
70
70
  callback = Misc.process_options options, :callback
71
71
  if callback
72
- while not io.eof?
73
- res = yield io.gets.strip
72
+ while line = io.gets
73
+ res = yield line.strip
74
74
  callback.call res
75
75
  end
76
76
  else
@@ -65,7 +65,7 @@ module TSV
65
65
  end
66
66
 
67
67
  def chop_line(line)
68
- line.split(@sep, -1)
68
+ @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
69
69
  end
70
70
 
71
71
  def get_values_single_from_flat(parts)
@@ -448,8 +448,12 @@ module TSV
448
448
  def options
449
449
  options = {}
450
450
  TSV::ENTRIES.each do |entry|
451
- options[entry.to_sym] = self.send(entry) if self.respond_to? entry
451
+ if self.respond_to? entry
452
+ value = self.send(entry)
453
+ options[entry.to_sym] = value unless value.nil?
454
+ end
452
455
  end
456
+ options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
453
457
  IndiferentHash.setup options
454
458
  end
455
459
 
@@ -0,0 +1,55 @@
1
+ require 'rbbt/tsv/parser'
2
+ require 'rbbt/tsv/dumper'
3
+ module TSV
4
+
5
+ def self.collapse_stream(input, options = {})
6
+ options = Misc.add_defaults options, :sep => "\t"
7
+ input_stream = TSV.get_stream input
8
+
9
+ sorted_input_stream = Misc.sort_stream input_stream
10
+
11
+ parser = TSV::Parser.new sorted_input_stream, options.dup
12
+ dumper = TSV::Dumper.new parser
13
+ header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
14
+ dumper.close_in
15
+ dumper.close_out
16
+ dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header
17
+ dumper
18
+ end
19
+
20
+ def self.paste_streams(inputs, options = {})
21
+ options = Misc.add_defaults options, :sep => "\t", :sort => false
22
+ sort = Misc.process_options options, :sort
23
+
24
+ input_streams = []
25
+ input_lines = []
26
+ input_fields = []
27
+ input_key_fields = []
28
+ input_options = []
29
+
30
+ input_source_streams = inputs.collect do |input|
31
+ stream = TSV.get_stream input
32
+ stream = sort ? Misc.sort_stream(stream) : stream
33
+ end
34
+
35
+ input_source_streams.each do |stream|
36
+ parser = TSV::Parser.new stream, options
37
+ input_streams << parser.stream
38
+ input_lines << parser.first_line
39
+ input_fields << parser.fields
40
+ input_key_fields << parser.key_field
41
+ input_options << parser.options
42
+ end
43
+
44
+ key_field = input_key_fields.first
45
+ fields = input_fields.flatten
46
+ options = options.merge(input_options.first)
47
+
48
+ dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
49
+ dumper.close_in
50
+ dumper.close_out
51
+ header = TSV.header_lines(key_field, fields, options)
52
+ dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
53
+ dumper
54
+ end
55
+ end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -61,6 +61,7 @@ module TSV
61
61
  begin
62
62
  TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
63
63
  rescue
64
+ Log.exception $!
64
65
  TSV.setup({}, :type => :single, :cast => :to_i)
65
66
  end
66
67
  end
@@ -106,6 +107,11 @@ module TSV
106
107
  when String
107
108
  raise "Could not open file given by String: #{Misc.fingerprint file}" unless Open.remote?(file) or File.exists? file
108
109
  Open.open(file, open_options)
110
+ when (defined? Step and Step)
111
+ stream = file.get_stream
112
+ stream || get_stream(file.join.path)
113
+ when TSV::Dumper
114
+ file.stream
109
115
  else
110
116
  raise "Cannot get stream from: #{file.inspect}"
111
117
  end
@@ -134,7 +140,7 @@ module TSV
134
140
  sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
135
141
 
136
142
  str = ""
137
- str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
143
+ str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
138
144
  if fields
139
145
  str << "#" << key_field << sep << fields * sep << "\n"
140
146
  end
@@ -1,9 +1,9 @@
1
1
  require 'lockfile'
2
- require 'net/smtp'
3
2
  require 'digest/md5'
4
3
  require 'cgi'
5
4
  require 'zlib'
6
5
  require 'rubygems/package'
6
+
7
7
  require 'rbbt/util/tar'
8
8
  require 'rbbt/util/misc/exceptions'
9
9
  require 'rbbt/util/misc/concurrent_stream'
@@ -15,769 +15,12 @@ require 'rbbt/util/misc/inspect'
15
15
  require 'rbbt/util/misc/math'
16
16
  require 'rbbt/util/misc/development'
17
17
  require 'rbbt/util/misc/lock'
18
-
19
-
20
- class Hash
21
- def chunked_values_at(keys, max = 5000)
22
- Misc.ordered_divide(keys, max).inject([]) do |acc,c|
23
- new = self.values_at(*c)
24
- new.annotate acc if new.respond_to? :annotate and acc.empty?
25
- acc.concat(new)
26
- end
27
- end
28
- end
29
-
30
- module LaterString
31
- def to_s
32
- yield
33
- end
34
- end
18
+ require 'rbbt/util/misc/options'
19
+ require 'rbbt/util/misc/system'
20
+ require 'rbbt/util/misc/objects'
21
+ require 'rbbt/util/misc/manipulation'
35
22
 
36
23
  module Misc
37
-
38
- def self.parse_cmd_params(str)
39
- return str if Array === str
40
- str.scan(/
41
- (?:["']([^"']*?)["']) |
42
- ([^"'\s]+)
43
- /x).flatten.compact
44
- end
45
-
46
- def self.pid_exists?(pid)
47
- return false if pid.nil?
48
- begin
49
- Process.getpgid(pid.to_i)
50
- true
51
- rescue Errno::ESRCH
52
- false
53
- end
54
- end
55
-
56
- def self.collapse_ranges(ranges)
57
- processed = []
58
- last = nil
59
- final = []
60
- ranges.sort_by{|range| range.begin }.each do |range|
61
- rbegin = range.begin
62
- rend = range.end
63
- if last.nil? or rbegin > last
64
- processed << [rbegin, rend]
65
- last = rend
66
- else
67
- new_processed = []
68
- processed.each do |pbegin,pend|
69
- if pend < rbegin
70
- final << [pbegin, pend]
71
- else
72
- eend = [rend, pend].max
73
- new_processed << [pbegin, eend]
74
- break
75
- end
76
- end
77
- processed = new_processed
78
- last = rend if rend > last
79
- end
80
- end
81
-
82
- final.concat processed
83
- final.collect{|b,e| (b..e)}
84
- end
85
-
86
- def self.total_length(ranges)
87
- Misc.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
88
- end
89
-
90
- def self.random_sample_in_range(total, size)
91
- p = Set.new
92
-
93
- if size > total / 10
94
- template = (0..total - 1).to_a
95
- size.times do |i|
96
- pos = (rand * (total - i)).floor
97
- if pos == template.length - 1
98
- v = template.pop
99
- else
100
- v, n = template[pos], template[-1]
101
- template.pop
102
- template[pos] = n
103
- end
104
- p << v
105
- end
106
- else
107
- size.times do
108
- pos = nil
109
- while pos.nil?
110
- pos = (rand * total).floor
111
- if p.include? pos
112
- pos = nil
113
- end
114
- end
115
- p << pos
116
- end
117
- end
118
- p
119
- end
120
-
121
- def self.sample(ary, size, replacement = false)
122
- if ary.respond_to? :sample
123
- ary.sample size
124
- else
125
- total = ary.length
126
- p = random_sample_in_range(total, size)
127
- ary.values_at *p
128
- end
129
- end
130
-
131
-
132
- def self.prepare_entity(entity, field, options = {})
133
- return entity unless defined? Entity
134
- return entity unless String === entity or Array === entity
135
- options ||= {}
136
-
137
- dup_array = options.delete :dup_array
138
-
139
- if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
140
- params = options.dup
141
-
142
- params[:format] ||= params.delete "format"
143
- params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
144
-
145
- mod = Entity === field ? field : Entity.formats[field]
146
- entity = mod.setup(
147
- ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
148
- params
149
- )
150
- end
151
-
152
- entity
153
- end
154
-
155
- def self.ensembl_server(organism)
156
- date = organism.split("/")[1]
157
- if date.nil?
158
- "www.ensembl.org"
159
- else
160
- "#{ date }.archive.ensembl.org"
161
- end
162
- end
163
-
164
-
165
- def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
166
- name1 ||= "list 1"
167
- name2 ||= "list 2"
168
- name3 ||= "list 3"
169
-
170
- sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
171
-
172
- total = total.length if Array === total
173
-
174
- label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
175
- label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
176
- label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
177
- if total
178
- label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
179
- else
180
- label << "| INTERSECTION: #{sizes[6]}"
181
- end
182
-
183
- max = total || sizes.max
184
- sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
185
- url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
186
- end
187
-
188
- def self.consolidate(list)
189
- list.inject(nil){|acc,e|
190
- if acc.nil?
191
- acc = e
192
- else
193
- acc.concat e
194
- acc
195
- end
196
- }
197
- end
198
-
199
- def self.positional2hash(keys, *values)
200
- if Hash === values.last
201
- extra = values.pop
202
- inputs = Misc.zip2hash(keys, values)
203
- inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
204
- inputs = Misc.add_defaults inputs, extra
205
- inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
206
- inputs
207
- else
208
- Misc.zip2hash(keys, values)
209
- end
210
- end
211
-
212
- def self.send_email(from, to, subject, message, options = {})
213
- IndiferentHash.setup(options)
214
- options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
215
-
216
- server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
217
-
218
- msg = <<-END_OF_MESSAGE
219
- From: #{from_alias} <#{from}>
220
- To: #{to_alias} <#{to}>
221
- Subject: #{subject}
222
-
223
- #{message}
224
- END_OF_MESSAGE
225
-
226
- Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
227
- smtp.send_message msg, from, to
228
- end
229
- end
230
-
231
- def self.counts(array)
232
- counts = {}
233
- array.each do |e|
234
- counts[e] ||= 0
235
- counts[e] += 1
236
- end
237
-
238
- counts
239
- end
240
-
241
- def self.proportions(array)
242
- total = array.length
243
-
244
- proportions = Hash.new 0
245
-
246
- array.each do |e|
247
- proportions[e] += 1.0 / total
248
- end
249
-
250
- class << proportions; self;end.class_eval do
251
- def to_s
252
- sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
253
- end
254
- end
255
-
256
- proportions
257
- end
258
-
259
-
260
- def self.sorted_array_hits(a1, a2)
261
- e1, e2 = a1.shift, a2.shift
262
- counter = 0
263
- match = []
264
- while true
265
- break if e1.nil? or e2.nil?
266
- case e1 <=> e2
267
- when 0
268
- match << counter
269
- e1, e2 = a1.shift, a2.shift
270
- counter += 1
271
- when -1
272
- while not e1.nil? and e1 < e2
273
- e1 = a1.shift
274
- counter += 1
275
- end
276
- when 1
277
- e2 = a2.shift
278
- e2 = a2.shift while not e2.nil? and e2 < e1
279
- end
280
- end
281
- match
282
- end
283
-
284
- def self.intersect_sorted_arrays(a1, a2)
285
- e1, e2 = a1.shift, a2.shift
286
- intersect = []
287
- while true
288
- break if e1.nil? or e2.nil?
289
- case e1 <=> e2
290
- when 0
291
- intersect << e1
292
- e1, e2 = a1.shift, a2.shift
293
- when -1
294
- e1 = a1.shift while not e1.nil? and e1 < e2
295
- when 1
296
- e2 = a2.shift
297
- e2 = a2.shift while not e2.nil? and e2 < e1
298
- end
299
- end
300
- intersect
301
- end
302
-
303
- def self.merge_sorted_arrays(a1, a2)
304
- e1, e2 = a1.shift, a2.shift
305
- new = []
306
- while true
307
- case
308
- when (e1 and e2)
309
- case e1 <=> e2
310
- when 0
311
- new << e1
312
- e1, e2 = a1.shift, a2.shift
313
- when -1
314
- new << e1
315
- e1 = a1.shift
316
- when 1
317
- new << e2
318
- e2 = a2.shift
319
- end
320
- when e2
321
- new << e2
322
- new.concat a2
323
- break
324
- when e1
325
- new << e1
326
- new.concat a1
327
- break
328
- else
329
- break
330
- end
331
- end
332
- new
333
- end
334
-
335
- def self.binary_include?(array, elem)
336
- upper = array.size - 1
337
- lower = 0
338
-
339
- return -1 if upper < lower
340
-
341
- while(upper >= lower) do
342
- idx = lower + (upper - lower) / 2
343
- value = array[idx]
344
-
345
- case elem <=> value
346
- when 0
347
- return true
348
- when -1
349
- upper = idx - 1
350
- when 1
351
- lower = idx + 1
352
- else
353
- raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
354
- end
355
- end
356
-
357
- return false
358
- end
359
-
360
-
361
-
362
- def self.array2hash(array, default = nil)
363
- hash = {}
364
- array.each do |key, value|
365
- value = default.dup if value.nil? and not default.nil?
366
- hash[key] = value
367
- end
368
- hash
369
- end
370
-
371
- def self.zip2hash(list1, list2)
372
- hash = {}
373
- list1.each_with_index do |e,i|
374
- hash[e] = list2[i]
375
- end
376
- hash
377
- end
378
-
379
- def self.process_to_hash(list)
380
- result = yield list
381
- zip2hash(list, result)
382
- end
383
-
384
- def self.env_add(var, value, sep = ":", prepend = true)
385
- ENV[var] ||= ""
386
- return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
387
- if prepend
388
- ENV[var] = value + sep + ENV[var]
389
- else
390
- ENV[var] += sep + ENV[var]
391
- end
392
- end
393
-
394
- def self.do_once(&block)
395
- return nil if $__did_once
396
- $__did_once = true
397
- yield
398
- nil
399
- end
400
-
401
- def self.reset_do_once
402
- $__did_once = false
403
- end
404
-
405
- def self.insist(times = 3, sleep = nil, msg = nil)
406
- if Array === times
407
- sleep_array = times
408
- times = sleep_array.length
409
- sleep = sleep_array.shift
410
- end
411
- try = 0
412
- begin
413
- yield
414
- rescue
415
- if msg
416
- Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
417
- else
418
- Log.warn("Insisting after exception: #{$!.message}")
419
- end
420
- if sleep and try > 0
421
- sleep sleep
422
- sleep = sleep_array.shift if sleep_array
423
- else
424
- Thread.pass
425
- end
426
- try += 1
427
- retry if try < times
428
- raise $!
429
- end
430
- end
431
-
432
- def self.try3times(&block)
433
- insist(3, &block)
434
- end
435
-
436
- def self.hash2string(hash)
437
- hash.sort_by{|k,v| k.to_s}.collect{|k,v|
438
- next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
439
- [ Symbol === k ? ":" << k.to_s : k,
440
- Symbol === v ? ":" << v.to_s : v] * "="
441
- }.compact * "#"
442
- end
443
-
444
- def self.GET_params2hash(string)
445
- hash = {}
446
- string.split('&').collect{|item|
447
- key, value = item.split("=").values_at 0, 1
448
- hash[key] = value.nil? ? "" : CGI.unescape(value)
449
- }
450
- hash
451
- end
452
-
453
- def self.hash2GET_params(hash)
454
- hash.sort_by{|k,v| k.to_s}.collect{|k,v|
455
- next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
456
- v = case
457
- when Symbol === v
458
- v.to_s
459
- when Array === v
460
- v * ","
461
- else
462
- CGI.escape(v.to_s)
463
- end
464
- [ Symbol === k ? k.to_s : k, v] * "="
465
- }.compact * "&"
466
- end
467
-
468
- def self.hash_to_html_tag_attributes(hash)
469
- return "" if hash.nil? or hash.empty?
470
- hash.collect{|k,v|
471
- case
472
- when (k.nil? or v.nil? or (String === v and v.empty?))
473
- nil
474
- when Array === v
475
- [k,"'" << v * " " << "'"] * "="
476
- when String === v
477
- [k,"'" << v << "'"] * "="
478
- when Symbol === v
479
- [k,"'" << v.to_s << "'"] * "="
480
- when TrueClass === v
481
- [k,"'" << v.to_s << "'"] * "="
482
- when (Fixnum === v or Float === v)
483
- [k,"'" << v.to_s << "'"] * "="
484
- else
485
- nil
486
- end
487
- }.compact * " "
488
- end
489
-
490
- def self.html_tag(tag, content = nil, params = {})
491
- attr_str = hash_to_html_tag_attributes(params)
492
- attr_str = " " << attr_str if String === attr_str and attr_str != ""
493
- html = if content.nil?
494
- "<#{ tag }#{attr_str}/>"
495
- else
496
- "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
497
- end
498
-
499
- html
500
- end
501
-
502
- def self.path_relative_to(basedir, path)
503
- path = File.expand_path(path) unless path[0] == "/"
504
- basedir = File.expand_path(basedir) unless basedir[0] == "/"
505
-
506
- if path.index(basedir) == 0
507
- if basedir[-1] == "/"
508
- return path[basedir.length..-1]
509
- else
510
- return path[basedir.length+1..-1]
511
- end
512
- else
513
- return nil
514
- end
515
- end
516
-
517
- def self.hostname
518
- @hostanem ||= `hostname`.strip
519
- end
520
-
521
-
522
- def self.common_path(dir, file)
523
- file = File.expand_path file
524
- dir = File.expand_path dir
525
-
526
- return true if file == dir
527
- while File.dirname(file) != file
528
- file = File.dirname(file)
529
- return true if file == dir
530
- end
531
-
532
- return false
533
- end
534
-
535
- # WARN: probably not thread safe...
536
- def self.in_dir(dir)
537
- old_pwd = FileUtils.pwd
538
- res = nil
539
- begin
540
- FileUtils.mkdir_p dir unless File.exists? dir
541
- FileUtils.cd dir
542
- res = yield
543
- ensure
544
- FileUtils.cd old_pwd
545
- end
546
- res
547
- end
548
-
549
-
550
- def self.add_defaults(options, defaults = {})
551
- case
552
- when Hash === options
553
- new_options = options.dup
554
- when String === options
555
- new_options = string2hash options
556
- else
557
- raise "Format of '#{options.inspect}' not understood. It should be a hash"
558
- end
559
-
560
- defaults.each do |key, value|
561
- next if options.include? key
562
-
563
- new_options[key] = value
564
- end
565
-
566
- new_options
567
- end
568
-
569
- def self.process_options(hash, *keys)
570
- if keys.length == 1
571
- hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
572
- else
573
- keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
574
- end
575
- end
576
-
577
- def self.pull_keys(hash, prefix)
578
- new = {}
579
- hash.keys.each do |key|
580
- if key.to_s =~ /#{ prefix }_(.*)/
581
- case
582
- when String === key
583
- new[$1] = hash.delete key
584
- when Symbol === key
585
- new[$1.to_sym] = hash.delete key
586
- end
587
- else
588
- if key.to_s == prefix.to_s
589
- new[key] = hash.delete key
590
- end
591
- end
592
- end
593
-
594
- new
595
- end
596
-
597
- def self.string2const(string)
598
- return nil if string.nil?
599
- mod = Kernel
600
-
601
- string.to_s.split('::').each do |str|
602
- mod = mod.const_get str
603
- end
604
-
605
- mod
606
- end
607
-
608
- def self.string2hash_old(string)
609
-
610
- options = {}
611
- string.split(/#/).each do |str|
612
- if str.match(/(.*)=(.*)/)
613
- option, value = $1, $2
614
- else
615
- option, value = str, true
616
- end
617
-
618
- option = option.sub(":",'').to_sym if option.chars.first == ':'
619
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
620
-
621
- if value == true
622
- options[option] = option.to_s.chars.first != '!'
623
- else
624
- options[option] = Thread.start do
625
- $SAFE = 0;
626
- case
627
- when value =~ /^(?:true|T)$/i
628
- true
629
- when value =~ /^(?:false|F)$/i
630
- false
631
- when Symbol === value
632
- value
633
- when (String === value and value =~ /^\/(.*)\/$/)
634
- Regexp.new /#{$1}/
635
- else
636
- begin
637
- Kernel.const_get value
638
- rescue
639
- begin
640
- raise if value =~ /[a-z]/ and defined? value
641
- eval(value)
642
- rescue Exception
643
- value
644
- end
645
- end
646
- end
647
- end.value
648
- end
649
- end
650
-
651
- options
652
- end
653
-
654
- def self.string2hash(string)
655
- options = {}
656
-
657
- string.split('#').each do |str|
658
- key, sep, value = str.partition "="
659
-
660
- key = key[1..-1].to_sym if key[0] == ":"
661
-
662
- options[key] = true and next if value.empty?
663
- options[key] = value[1..-1].to_sym and next if value[0] == ":"
664
- options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
665
- options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
666
- options[key] = value.to_i and next if value =~ /^\d+$/
667
- options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
668
- options[key] = true and next if value == "true"
669
- options[key] = false and next if value == "false"
670
- options[key] = value and next
671
-
672
- options[key] = begin
673
- saved_safe = $SAFE
674
- $SAFE = 0
675
- eval(value)
676
- rescue Exception
677
- value
678
- ensure
679
- $SAFE = saved_safe
680
- end
681
- end
682
-
683
- return options
684
-
685
- options = {}
686
- string.split(/#/).each do |str|
687
- if str.match(/(.*)=(.*)/)
688
- option, value = $1, $2
689
- else
690
- option, value = str, true
691
- end
692
-
693
- option = option.sub(":",'').to_sym if option.chars.first == ':'
694
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
695
-
696
- if value == true
697
- options[option] = option.to_s.chars.first != '!'
698
- else
699
- options[option] = Thread.start do
700
- $SAFE = 0;
701
- case
702
- when value =~ /^(?:true|T)$/i
703
- true
704
- when value =~ /^(?:false|F)$/i
705
- false
706
- when Symbol === value
707
- value
708
- when (String === value and value =~ /^\/(.*)\/$/)
709
- Regexp.new /#{$1}/
710
- else
711
- begin
712
- Kernel.const_get value
713
- rescue
714
- begin
715
- raise if value =~ /[a-z]/ and defined? value
716
- eval(value)
717
- rescue Exception
718
- value
719
- end
720
- end
721
- end
722
- end.value
723
- end
724
- end
725
-
726
- options
727
- end
728
-
729
- def self.field_position(fields, field, quiet = false)
730
- return field if Integer === field or Range === field
731
- raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
732
- fields.each_with_index{|f,i| return i if f == field}
733
- field_re = Regexp.new /^#{field}$/i
734
- fields.each_with_index{|f,i| return i if f =~ field_re}
735
- raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
736
- end
737
-
738
- # Divides the array into +num+ chunks of the same size by placing one
739
- # element in each chunk iteratively.
740
- def self.divide(array, num)
741
- num = 1 if num == 0
742
- chunks = []
743
- num.to_i.times do chunks << [] end
744
- array.each_with_index{|e, i|
745
- c = i % num
746
- chunks[c] << e
747
- }
748
- chunks
749
- end
750
-
751
- # Divides the array into chunks of +num+ same size by placing one
752
- # element in each chunk iteratively.
753
- def self.ordered_divide(array, num)
754
- last = array.length - 1
755
- chunks = []
756
- current = 0
757
- while current <= last
758
- next_current = [last, current + num - 1].min
759
- chunks << array[current..next_current]
760
- current = next_current + 1
761
- end
762
- chunks
763
- end
764
-
765
- def self.append_zipped(current, new)
766
- current.each do |v|
767
- n = new.shift
768
- if Array === n
769
- v.concat new
770
- else
771
- v << n
772
- end
773
- end
774
- current
775
- end
776
-
777
- def self.zip_fields(array)
778
- return [] if array.empty? or (first = array.first).nil?
779
- first.zip(*array[1..-1])
780
- end
781
24
  end
782
25
 
783
26
  module PDF2Text