rbbt-util 5.11.9 → 5.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2dfad08e355d768565decb173f5a86198c3600e2
4
- data.tar.gz: 64421ddf30a3066b1313c1762a9c1b41cb9a719b
3
+ metadata.gz: fea2c562a8e3611c8c9767589ec160a36c1b988d
4
+ data.tar.gz: 42c868b4354a14c64c9eb5ea45b4d447c3d1c757
5
5
  SHA512:
6
- metadata.gz: f415797c4922dea7a9f3ecc8ff636cd3681da6f22b0111c34e1210e9beed4430c8c17c4787e9fe5ad4ad2e63d014be2308a533d220f87e2f40224b9d91daadaa
7
- data.tar.gz: 8b7ad6d7440ece8580bab37f20c6e7e86158953a73dce168816b26e6003cb9966dd40359a798a0eb4abc152393c86c922f93ce3d2a27c272a47ebbbba5907d35
6
+ metadata.gz: c85bd1f0ed345e6277daed12a41145134f81914e5d1b678db4676296dcb9a8e1ae1bc66c0b3dcc866dc9cab1ebcb80d50cb7181f4e71d755ecb721e3dffa9f55
7
+ data.tar.gz: fd38d3d2ef666f8b8b4eae6ce66b8ce45b8c5cb6a57e3bbc28c2900f239495a9f991080026e09dc7fbaca480518095e426f482afcc104d07f656835b80509bf2
data/lib/rbbt/persist.rb CHANGED
@@ -465,7 +465,7 @@ module Persist
465
465
  res.abort if res.respond_to? :abort
466
466
  raise $!
467
467
  end
468
- when TSV::Dumper
468
+ when (defined? TSV and TSV::Dumper)
469
469
  begin
470
470
  io = res.stream
471
471
  res = TSV.open(io)
data/lib/rbbt/tsv.rb CHANGED
@@ -15,6 +15,7 @@ require 'rbbt/tsv/attach'
15
15
  require 'rbbt/tsv/filter'
16
16
  require 'rbbt/tsv/field_index'
17
17
  require 'rbbt/tsv/parallel'
18
+ require 'rbbt/tsv/stream'
18
19
 
19
20
  module TSV
20
21
  class << self
@@ -64,8 +64,7 @@ module TSV
64
64
  end
65
65
 
66
66
  def close_out
67
- Log.debug "Close out #{@stream.inspect}"
68
- @stream.close
67
+ @stream.close unless @stream.closed?
69
68
  end
70
69
 
71
70
  def close_in
@@ -69,8 +69,8 @@ module TSV
69
69
  def self.traverse_io_array(io, options = {}, &block)
70
70
  callback = Misc.process_options options, :callback
71
71
  if callback
72
- while not io.eof?
73
- res = yield io.gets.strip
72
+ while line = io.gets
73
+ res = yield line.strip
74
74
  callback.call res
75
75
  end
76
76
  else
@@ -65,7 +65,7 @@ module TSV
65
65
  end
66
66
 
67
67
  def chop_line(line)
68
- line.split(@sep, -1)
68
+ @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
69
69
  end
70
70
 
71
71
  def get_values_single_from_flat(parts)
@@ -448,8 +448,12 @@ module TSV
448
448
  def options
449
449
  options = {}
450
450
  TSV::ENTRIES.each do |entry|
451
- options[entry.to_sym] = self.send(entry) if self.respond_to? entry
451
+ if self.respond_to? entry
452
+ value = self.send(entry)
453
+ options[entry.to_sym] = value unless value.nil?
454
+ end
452
455
  end
456
+ options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
453
457
  IndiferentHash.setup options
454
458
  end
455
459
 
@@ -0,0 +1,55 @@
1
+ require 'rbbt/tsv/parser'
2
+ require 'rbbt/tsv/dumper'
3
+ module TSV
4
+
5
+ def self.collapse_stream(input, options = {})
6
+ options = Misc.add_defaults options, :sep => "\t"
7
+ input_stream = TSV.get_stream input
8
+
9
+ sorted_input_stream = Misc.sort_stream input_stream
10
+
11
+ parser = TSV::Parser.new sorted_input_stream, options.dup
12
+ dumper = TSV::Dumper.new parser
13
+ header = TSV.header_lines(parser.key_field, parser.fields, parser.options)
14
+ dumper.close_in
15
+ dumper.close_out
16
+ dumper.stream = Misc.collapse_stream parser.stream, parser.first_line, parser.sep, header
17
+ dumper
18
+ end
19
+
20
+ def self.paste_streams(inputs, options = {})
21
+ options = Misc.add_defaults options, :sep => "\t", :sort => false
22
+ sort = Misc.process_options options, :sort
23
+
24
+ input_streams = []
25
+ input_lines = []
26
+ input_fields = []
27
+ input_key_fields = []
28
+ input_options = []
29
+
30
+ input_source_streams = inputs.collect do |input|
31
+ stream = TSV.get_stream input
32
+ stream = sort ? Misc.sort_stream(stream) : stream
33
+ end
34
+
35
+ input_source_streams.each do |stream|
36
+ parser = TSV::Parser.new stream, options
37
+ input_streams << parser.stream
38
+ input_lines << parser.first_line
39
+ input_fields << parser.fields
40
+ input_key_fields << parser.key_field
41
+ input_options << parser.options
42
+ end
43
+
44
+ key_field = input_key_fields.first
45
+ fields = input_fields.flatten
46
+ options = options.merge(input_options.first)
47
+
48
+ dumper = TSV::Dumper.new options.merge(:key_field => key_field, :fields => fields)
49
+ dumper.close_in
50
+ dumper.close_out
51
+ header = TSV.header_lines(key_field, fields, options)
52
+ dumper.stream = Misc.paste_streams input_streams, input_lines, options[:sep], header
53
+ dumper
54
+ end
55
+ end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -61,6 +61,7 @@ module TSV
61
61
  begin
62
62
  TSV.open(CMD.cmd(cmd), :key_field => 1, :type => :single, :cast => :to_i)
63
63
  rescue
64
+ Log.exception $!
64
65
  TSV.setup({}, :type => :single, :cast => :to_i)
65
66
  end
66
67
  end
@@ -106,6 +107,11 @@ module TSV
106
107
  when String
107
108
  raise "Could not open file given by String: #{Misc.fingerprint file}" unless Open.remote?(file) or File.exists? file
108
109
  Open.open(file, open_options)
110
+ when (defined? Step and Step)
111
+ stream = file.get_stream
112
+ stream || get_stream(file.join.path)
113
+ when TSV::Dumper
114
+ file.stream
109
115
  else
110
116
  raise "Cannot get stream from: #{file.inspect}"
111
117
  end
@@ -134,7 +140,7 @@ module TSV
134
140
  sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
135
141
 
136
142
  str = ""
137
- str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
143
+ str << "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if entry_hash and entry_hash.any?
138
144
  if fields
139
145
  str << "#" << key_field << sep << fields * sep << "\n"
140
146
  end
@@ -1,9 +1,9 @@
1
1
  require 'lockfile'
2
- require 'net/smtp'
3
2
  require 'digest/md5'
4
3
  require 'cgi'
5
4
  require 'zlib'
6
5
  require 'rubygems/package'
6
+
7
7
  require 'rbbt/util/tar'
8
8
  require 'rbbt/util/misc/exceptions'
9
9
  require 'rbbt/util/misc/concurrent_stream'
@@ -15,769 +15,12 @@ require 'rbbt/util/misc/inspect'
15
15
  require 'rbbt/util/misc/math'
16
16
  require 'rbbt/util/misc/development'
17
17
  require 'rbbt/util/misc/lock'
18
-
19
-
20
- class Hash
21
- def chunked_values_at(keys, max = 5000)
22
- Misc.ordered_divide(keys, max).inject([]) do |acc,c|
23
- new = self.values_at(*c)
24
- new.annotate acc if new.respond_to? :annotate and acc.empty?
25
- acc.concat(new)
26
- end
27
- end
28
- end
29
-
30
- module LaterString
31
- def to_s
32
- yield
33
- end
34
- end
18
+ require 'rbbt/util/misc/options'
19
+ require 'rbbt/util/misc/system'
20
+ require 'rbbt/util/misc/objects'
21
+ require 'rbbt/util/misc/manipulation'
35
22
 
36
23
  module Misc
37
-
38
- def self.parse_cmd_params(str)
39
- return str if Array === str
40
- str.scan(/
41
- (?:["']([^"']*?)["']) |
42
- ([^"'\s]+)
43
- /x).flatten.compact
44
- end
45
-
46
- def self.pid_exists?(pid)
47
- return false if pid.nil?
48
- begin
49
- Process.getpgid(pid.to_i)
50
- true
51
- rescue Errno::ESRCH
52
- false
53
- end
54
- end
55
-
56
- def self.collapse_ranges(ranges)
57
- processed = []
58
- last = nil
59
- final = []
60
- ranges.sort_by{|range| range.begin }.each do |range|
61
- rbegin = range.begin
62
- rend = range.end
63
- if last.nil? or rbegin > last
64
- processed << [rbegin, rend]
65
- last = rend
66
- else
67
- new_processed = []
68
- processed.each do |pbegin,pend|
69
- if pend < rbegin
70
- final << [pbegin, pend]
71
- else
72
- eend = [rend, pend].max
73
- new_processed << [pbegin, eend]
74
- break
75
- end
76
- end
77
- processed = new_processed
78
- last = rend if rend > last
79
- end
80
- end
81
-
82
- final.concat processed
83
- final.collect{|b,e| (b..e)}
84
- end
85
-
86
- def self.total_length(ranges)
87
- Misc.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
88
- end
89
-
90
- def self.random_sample_in_range(total, size)
91
- p = Set.new
92
-
93
- if size > total / 10
94
- template = (0..total - 1).to_a
95
- size.times do |i|
96
- pos = (rand * (total - i)).floor
97
- if pos == template.length - 1
98
- v = template.pop
99
- else
100
- v, n = template[pos], template[-1]
101
- template.pop
102
- template[pos] = n
103
- end
104
- p << v
105
- end
106
- else
107
- size.times do
108
- pos = nil
109
- while pos.nil?
110
- pos = (rand * total).floor
111
- if p.include? pos
112
- pos = nil
113
- end
114
- end
115
- p << pos
116
- end
117
- end
118
- p
119
- end
120
-
121
- def self.sample(ary, size, replacement = false)
122
- if ary.respond_to? :sample
123
- ary.sample size
124
- else
125
- total = ary.length
126
- p = random_sample_in_range(total, size)
127
- ary.values_at *p
128
- end
129
- end
130
-
131
-
132
- def self.prepare_entity(entity, field, options = {})
133
- return entity unless defined? Entity
134
- return entity unless String === entity or Array === entity
135
- options ||= {}
136
-
137
- dup_array = options.delete :dup_array
138
-
139
- if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
140
- params = options.dup
141
-
142
- params[:format] ||= params.delete "format"
143
- params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
144
-
145
- mod = Entity === field ? field : Entity.formats[field]
146
- entity = mod.setup(
147
- ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
148
- params
149
- )
150
- end
151
-
152
- entity
153
- end
154
-
155
- def self.ensembl_server(organism)
156
- date = organism.split("/")[1]
157
- if date.nil?
158
- "www.ensembl.org"
159
- else
160
- "#{ date }.archive.ensembl.org"
161
- end
162
- end
163
-
164
-
165
- def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
166
- name1 ||= "list 1"
167
- name2 ||= "list 2"
168
- name3 ||= "list 3"
169
-
170
- sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
171
-
172
- total = total.length if Array === total
173
-
174
- label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
175
- label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
176
- label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
177
- if total
178
- label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
179
- else
180
- label << "| INTERSECTION: #{sizes[6]}"
181
- end
182
-
183
- max = total || sizes.max
184
- sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
185
- url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
186
- end
187
-
188
- def self.consolidate(list)
189
- list.inject(nil){|acc,e|
190
- if acc.nil?
191
- acc = e
192
- else
193
- acc.concat e
194
- acc
195
- end
196
- }
197
- end
198
-
199
- def self.positional2hash(keys, *values)
200
- if Hash === values.last
201
- extra = values.pop
202
- inputs = Misc.zip2hash(keys, values)
203
- inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
204
- inputs = Misc.add_defaults inputs, extra
205
- inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
206
- inputs
207
- else
208
- Misc.zip2hash(keys, values)
209
- end
210
- end
211
-
212
- def self.send_email(from, to, subject, message, options = {})
213
- IndiferentHash.setup(options)
214
- options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login
215
-
216
- server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth
217
-
218
- msg = <<-END_OF_MESSAGE
219
- From: #{from_alias} <#{from}>
220
- To: #{to_alias} <#{to}>
221
- Subject: #{subject}
222
-
223
- #{message}
224
- END_OF_MESSAGE
225
-
226
- Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
227
- smtp.send_message msg, from, to
228
- end
229
- end
230
-
231
- def self.counts(array)
232
- counts = {}
233
- array.each do |e|
234
- counts[e] ||= 0
235
- counts[e] += 1
236
- end
237
-
238
- counts
239
- end
240
-
241
- def self.proportions(array)
242
- total = array.length
243
-
244
- proportions = Hash.new 0
245
-
246
- array.each do |e|
247
- proportions[e] += 1.0 / total
248
- end
249
-
250
- class << proportions; self;end.class_eval do
251
- def to_s
252
- sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
253
- end
254
- end
255
-
256
- proportions
257
- end
258
-
259
-
260
- def self.sorted_array_hits(a1, a2)
261
- e1, e2 = a1.shift, a2.shift
262
- counter = 0
263
- match = []
264
- while true
265
- break if e1.nil? or e2.nil?
266
- case e1 <=> e2
267
- when 0
268
- match << counter
269
- e1, e2 = a1.shift, a2.shift
270
- counter += 1
271
- when -1
272
- while not e1.nil? and e1 < e2
273
- e1 = a1.shift
274
- counter += 1
275
- end
276
- when 1
277
- e2 = a2.shift
278
- e2 = a2.shift while not e2.nil? and e2 < e1
279
- end
280
- end
281
- match
282
- end
283
-
284
- def self.intersect_sorted_arrays(a1, a2)
285
- e1, e2 = a1.shift, a2.shift
286
- intersect = []
287
- while true
288
- break if e1.nil? or e2.nil?
289
- case e1 <=> e2
290
- when 0
291
- intersect << e1
292
- e1, e2 = a1.shift, a2.shift
293
- when -1
294
- e1 = a1.shift while not e1.nil? and e1 < e2
295
- when 1
296
- e2 = a2.shift
297
- e2 = a2.shift while not e2.nil? and e2 < e1
298
- end
299
- end
300
- intersect
301
- end
302
-
303
- def self.merge_sorted_arrays(a1, a2)
304
- e1, e2 = a1.shift, a2.shift
305
- new = []
306
- while true
307
- case
308
- when (e1 and e2)
309
- case e1 <=> e2
310
- when 0
311
- new << e1
312
- e1, e2 = a1.shift, a2.shift
313
- when -1
314
- new << e1
315
- e1 = a1.shift
316
- when 1
317
- new << e2
318
- e2 = a2.shift
319
- end
320
- when e2
321
- new << e2
322
- new.concat a2
323
- break
324
- when e1
325
- new << e1
326
- new.concat a1
327
- break
328
- else
329
- break
330
- end
331
- end
332
- new
333
- end
334
-
335
- def self.binary_include?(array, elem)
336
- upper = array.size - 1
337
- lower = 0
338
-
339
- return -1 if upper < lower
340
-
341
- while(upper >= lower) do
342
- idx = lower + (upper - lower) / 2
343
- value = array[idx]
344
-
345
- case elem <=> value
346
- when 0
347
- return true
348
- when -1
349
- upper = idx - 1
350
- when 1
351
- lower = idx + 1
352
- else
353
- raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
354
- end
355
- end
356
-
357
- return false
358
- end
359
-
360
-
361
-
362
- def self.array2hash(array, default = nil)
363
- hash = {}
364
- array.each do |key, value|
365
- value = default.dup if value.nil? and not default.nil?
366
- hash[key] = value
367
- end
368
- hash
369
- end
370
-
371
- def self.zip2hash(list1, list2)
372
- hash = {}
373
- list1.each_with_index do |e,i|
374
- hash[e] = list2[i]
375
- end
376
- hash
377
- end
378
-
379
- def self.process_to_hash(list)
380
- result = yield list
381
- zip2hash(list, result)
382
- end
383
-
384
- def self.env_add(var, value, sep = ":", prepend = true)
385
- ENV[var] ||= ""
386
- return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
387
- if prepend
388
- ENV[var] = value + sep + ENV[var]
389
- else
390
- ENV[var] += sep + ENV[var]
391
- end
392
- end
393
-
394
- def self.do_once(&block)
395
- return nil if $__did_once
396
- $__did_once = true
397
- yield
398
- nil
399
- end
400
-
401
- def self.reset_do_once
402
- $__did_once = false
403
- end
404
-
405
- def self.insist(times = 3, sleep = nil, msg = nil)
406
- if Array === times
407
- sleep_array = times
408
- times = sleep_array.length
409
- sleep = sleep_array.shift
410
- end
411
- try = 0
412
- begin
413
- yield
414
- rescue
415
- if msg
416
- Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
417
- else
418
- Log.warn("Insisting after exception: #{$!.message}")
419
- end
420
- if sleep and try > 0
421
- sleep sleep
422
- sleep = sleep_array.shift if sleep_array
423
- else
424
- Thread.pass
425
- end
426
- try += 1
427
- retry if try < times
428
- raise $!
429
- end
430
- end
431
-
432
- def self.try3times(&block)
433
- insist(3, &block)
434
- end
435
-
436
- def self.hash2string(hash)
437
- hash.sort_by{|k,v| k.to_s}.collect{|k,v|
438
- next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
439
- [ Symbol === k ? ":" << k.to_s : k,
440
- Symbol === v ? ":" << v.to_s : v] * "="
441
- }.compact * "#"
442
- end
443
-
444
- def self.GET_params2hash(string)
445
- hash = {}
446
- string.split('&').collect{|item|
447
- key, value = item.split("=").values_at 0, 1
448
- hash[key] = value.nil? ? "" : CGI.unescape(value)
449
- }
450
- hash
451
- end
452
-
453
- def self.hash2GET_params(hash)
454
- hash.sort_by{|k,v| k.to_s}.collect{|k,v|
455
- next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
456
- v = case
457
- when Symbol === v
458
- v.to_s
459
- when Array === v
460
- v * ","
461
- else
462
- CGI.escape(v.to_s)
463
- end
464
- [ Symbol === k ? k.to_s : k, v] * "="
465
- }.compact * "&"
466
- end
467
-
468
- def self.hash_to_html_tag_attributes(hash)
469
- return "" if hash.nil? or hash.empty?
470
- hash.collect{|k,v|
471
- case
472
- when (k.nil? or v.nil? or (String === v and v.empty?))
473
- nil
474
- when Array === v
475
- [k,"'" << v * " " << "'"] * "="
476
- when String === v
477
- [k,"'" << v << "'"] * "="
478
- when Symbol === v
479
- [k,"'" << v.to_s << "'"] * "="
480
- when TrueClass === v
481
- [k,"'" << v.to_s << "'"] * "="
482
- when (Fixnum === v or Float === v)
483
- [k,"'" << v.to_s << "'"] * "="
484
- else
485
- nil
486
- end
487
- }.compact * " "
488
- end
489
-
490
- def self.html_tag(tag, content = nil, params = {})
491
- attr_str = hash_to_html_tag_attributes(params)
492
- attr_str = " " << attr_str if String === attr_str and attr_str != ""
493
- html = if content.nil?
494
- "<#{ tag }#{attr_str}/>"
495
- else
496
- "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
497
- end
498
-
499
- html
500
- end
501
-
502
- def self.path_relative_to(basedir, path)
503
- path = File.expand_path(path) unless path[0] == "/"
504
- basedir = File.expand_path(basedir) unless basedir[0] == "/"
505
-
506
- if path.index(basedir) == 0
507
- if basedir[-1] == "/"
508
- return path[basedir.length..-1]
509
- else
510
- return path[basedir.length+1..-1]
511
- end
512
- else
513
- return nil
514
- end
515
- end
516
-
517
- def self.hostname
518
- @hostanem ||= `hostname`.strip
519
- end
520
-
521
-
522
- def self.common_path(dir, file)
523
- file = File.expand_path file
524
- dir = File.expand_path dir
525
-
526
- return true if file == dir
527
- while File.dirname(file) != file
528
- file = File.dirname(file)
529
- return true if file == dir
530
- end
531
-
532
- return false
533
- end
534
-
535
- # WARN: probably not thread safe...
536
- def self.in_dir(dir)
537
- old_pwd = FileUtils.pwd
538
- res = nil
539
- begin
540
- FileUtils.mkdir_p dir unless File.exists? dir
541
- FileUtils.cd dir
542
- res = yield
543
- ensure
544
- FileUtils.cd old_pwd
545
- end
546
- res
547
- end
548
-
549
-
550
- def self.add_defaults(options, defaults = {})
551
- case
552
- when Hash === options
553
- new_options = options.dup
554
- when String === options
555
- new_options = string2hash options
556
- else
557
- raise "Format of '#{options.inspect}' not understood. It should be a hash"
558
- end
559
-
560
- defaults.each do |key, value|
561
- next if options.include? key
562
-
563
- new_options[key] = value
564
- end
565
-
566
- new_options
567
- end
568
-
569
- def self.process_options(hash, *keys)
570
- if keys.length == 1
571
- hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
572
- else
573
- keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
574
- end
575
- end
576
-
577
- def self.pull_keys(hash, prefix)
578
- new = {}
579
- hash.keys.each do |key|
580
- if key.to_s =~ /#{ prefix }_(.*)/
581
- case
582
- when String === key
583
- new[$1] = hash.delete key
584
- when Symbol === key
585
- new[$1.to_sym] = hash.delete key
586
- end
587
- else
588
- if key.to_s == prefix.to_s
589
- new[key] = hash.delete key
590
- end
591
- end
592
- end
593
-
594
- new
595
- end
596
-
597
- def self.string2const(string)
598
- return nil if string.nil?
599
- mod = Kernel
600
-
601
- string.to_s.split('::').each do |str|
602
- mod = mod.const_get str
603
- end
604
-
605
- mod
606
- end
607
-
608
- def self.string2hash_old(string)
609
-
610
- options = {}
611
- string.split(/#/).each do |str|
612
- if str.match(/(.*)=(.*)/)
613
- option, value = $1, $2
614
- else
615
- option, value = str, true
616
- end
617
-
618
- option = option.sub(":",'').to_sym if option.chars.first == ':'
619
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
620
-
621
- if value == true
622
- options[option] = option.to_s.chars.first != '!'
623
- else
624
- options[option] = Thread.start do
625
- $SAFE = 0;
626
- case
627
- when value =~ /^(?:true|T)$/i
628
- true
629
- when value =~ /^(?:false|F)$/i
630
- false
631
- when Symbol === value
632
- value
633
- when (String === value and value =~ /^\/(.*)\/$/)
634
- Regexp.new /#{$1}/
635
- else
636
- begin
637
- Kernel.const_get value
638
- rescue
639
- begin
640
- raise if value =~ /[a-z]/ and defined? value
641
- eval(value)
642
- rescue Exception
643
- value
644
- end
645
- end
646
- end
647
- end.value
648
- end
649
- end
650
-
651
- options
652
- end
653
-
654
- def self.string2hash(string)
655
- options = {}
656
-
657
- string.split('#').each do |str|
658
- key, sep, value = str.partition "="
659
-
660
- key = key[1..-1].to_sym if key[0] == ":"
661
-
662
- options[key] = true and next if value.empty?
663
- options[key] = value[1..-1].to_sym and next if value[0] == ":"
664
- options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
665
- options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
666
- options[key] = value.to_i and next if value =~ /^\d+$/
667
- options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
668
- options[key] = true and next if value == "true"
669
- options[key] = false and next if value == "false"
670
- options[key] = value and next
671
-
672
- options[key] = begin
673
- saved_safe = $SAFE
674
- $SAFE = 0
675
- eval(value)
676
- rescue Exception
677
- value
678
- ensure
679
- $SAFE = saved_safe
680
- end
681
- end
682
-
683
- return options
684
-
685
- options = {}
686
- string.split(/#/).each do |str|
687
- if str.match(/(.*)=(.*)/)
688
- option, value = $1, $2
689
- else
690
- option, value = str, true
691
- end
692
-
693
- option = option.sub(":",'').to_sym if option.chars.first == ':'
694
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
695
-
696
- if value == true
697
- options[option] = option.to_s.chars.first != '!'
698
- else
699
- options[option] = Thread.start do
700
- $SAFE = 0;
701
- case
702
- when value =~ /^(?:true|T)$/i
703
- true
704
- when value =~ /^(?:false|F)$/i
705
- false
706
- when Symbol === value
707
- value
708
- when (String === value and value =~ /^\/(.*)\/$/)
709
- Regexp.new /#{$1}/
710
- else
711
- begin
712
- Kernel.const_get value
713
- rescue
714
- begin
715
- raise if value =~ /[a-z]/ and defined? value
716
- eval(value)
717
- rescue Exception
718
- value
719
- end
720
- end
721
- end
722
- end.value
723
- end
724
- end
725
-
726
- options
727
- end
728
-
729
- def self.field_position(fields, field, quiet = false)
730
- return field if Integer === field or Range === field
731
- raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
732
- fields.each_with_index{|f,i| return i if f == field}
733
- field_re = Regexp.new /^#{field}$/i
734
- fields.each_with_index{|f,i| return i if f =~ field_re}
735
- raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
736
- end
737
-
738
- # Divides the array into +num+ chunks of the same size by placing one
739
- # element in each chunk iteratively.
740
- def self.divide(array, num)
741
- num = 1 if num == 0
742
- chunks = []
743
- num.to_i.times do chunks << [] end
744
- array.each_with_index{|e, i|
745
- c = i % num
746
- chunks[c] << e
747
- }
748
- chunks
749
- end
750
-
751
- # Divides the array into chunks of +num+ same size by placing one
752
- # element in each chunk iteratively.
753
- def self.ordered_divide(array, num)
754
- last = array.length - 1
755
- chunks = []
756
- current = 0
757
- while current <= last
758
- next_current = [last, current + num - 1].min
759
- chunks << array[current..next_current]
760
- current = next_current + 1
761
- end
762
- chunks
763
- end
764
-
765
- def self.append_zipped(current, new)
766
- current.each do |v|
767
- n = new.shift
768
- if Array === n
769
- v.concat new
770
- else
771
- v << n
772
- end
773
- end
774
- current
775
- end
776
-
777
- def self.zip_fields(array)
778
- return [] if array.empty? or (first = array.first).nil?
779
- first.zip(*array[1..-1])
780
- end
781
24
  end
782
25
 
783
26
  module PDF2Text