rbbt-util 5.14.16 → 5.14.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +0 -1
- data/etc/app.d/base.rb +3 -1
- data/lib/rbbt/annotations/util.rb +4 -1
- data/lib/rbbt/association/index.rb +1 -1
- data/lib/rbbt/resource/path.rb +9 -1
- data/lib/rbbt/tsv/stream.rb +1 -3
- data/lib/rbbt/util/misc/bgzf.rb +2 -2
- data/lib/rbbt/util/misc/development.rb +12 -4
- data/lib/rbbt/util/semaphore.rb +8 -12
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/share/rbbt_commands/tsv/assemble_pdf_table +68 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c5725c2ab4aa762ca676d5d4e18f17dee2e8e92
|
4
|
+
data.tar.gz: 088c84611fccd379434220853ce601256540b9cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aa7267702a7587187a85c30fb4bf646efb298ff8d92d2f9aa23daeb58e19ce89dddd470f9d36fcb4258f5bda0e37a6d6bb98327c5308f33b12e79dcaa935e72
|
7
|
+
data.tar.gz: 60796a47e6344334dc576c850df51d405796d8e98f881fcf487f53a23eeaa6db54e0f79207682c28f59b8b694eab063dfbe1395ec114167e3cca9c70b5984116
|
data/bin/rbbt
CHANGED
data/etc/app.d/base.rb
CHANGED
@@ -23,7 +23,9 @@ use Rack::Session::Cookie, :key => 'rack.session',
|
|
23
23
|
|
24
24
|
|
25
25
|
#{{{ DIRECTORIES
|
26
|
-
|
26
|
+
global_var = Rbbt.var
|
27
|
+
local_var = Rbbt.var.app[$class_name]
|
28
|
+
|
27
29
|
set :cache_dir , local_var.sinatra.cache.find
|
28
30
|
set :persist_dir , local_var.sinatra.cache.persistence.find
|
29
31
|
set :persist_options , {:persist => true, :persist_dir => :persist_dir}
|
@@ -50,10 +50,13 @@ module Annotated
|
|
50
50
|
info = {}
|
51
51
|
fields.each_with_index do |field,i|
|
52
52
|
next if field == "literal"
|
53
|
-
|
53
|
+
case field
|
54
|
+
when "JSON"
|
54
55
|
JSON.parse(values[i]).each do |key, value|
|
55
56
|
info[key.to_sym] = value
|
56
57
|
end
|
58
|
+
when nil
|
59
|
+
next
|
57
60
|
else
|
58
61
|
info[field.to_sym] = resolve_array(values[i])
|
59
62
|
end
|
@@ -74,7 +74,7 @@ module Association
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def subset(source, target)
|
77
|
-
return [] if source.nil? or target.nil?
|
77
|
+
return [] if source.nil? or target.nil? or source.empty? or target.empty?
|
78
78
|
|
79
79
|
if source == :all or source == "all"
|
80
80
|
if target == :all or target == "all"
|
data/lib/rbbt/resource/path.rb
CHANGED
@@ -40,7 +40,7 @@ module Path
|
|
40
40
|
|
41
41
|
def glob(pattern = '*')
|
42
42
|
if self.include? "*"
|
43
|
-
|
43
|
+
self.glob_all
|
44
44
|
else
|
45
45
|
return [] unless self.exists?
|
46
46
|
exp = File.join(self.find, pattern)
|
@@ -156,6 +156,14 @@ module Path
|
|
156
156
|
compact.select{|file| file.exists? }.uniq
|
157
157
|
end
|
158
158
|
|
159
|
+
def glob_all(caller_lib = nil, search_paths = nil)
|
160
|
+
search_paths ||= self.search_paths || SEARCH_PATHS
|
161
|
+
search_paths = search_paths.dup
|
162
|
+
|
163
|
+
search_paths.keys.
|
164
|
+
collect{|where| Dir.glob(find(where, Path.caller_lib_dir, search_paths))}.
|
165
|
+
compact.flatten.collect{|path| Path.setup(path, self.resource, self.pkgdir)}
|
166
|
+
end
|
159
167
|
#{{{ Methods
|
160
168
|
|
161
169
|
def in_dir?(dir)
|
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -20,8 +20,6 @@ module TSV
|
|
20
20
|
options = Misc.add_defaults options, :sep => "\t", :sort => true
|
21
21
|
sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
|
22
22
|
|
23
|
-
|
24
|
-
|
25
23
|
out = Misc.open_pipe do |sin|
|
26
24
|
|
27
25
|
streams = streams.collect do |stream|
|
@@ -72,6 +70,7 @@ module TSV
|
|
72
70
|
key_field = key_fields.compact.first
|
73
71
|
fields = fields.compact.flatten
|
74
72
|
options = options.merge(input_options.first)
|
73
|
+
options[:type] = :list if options[:type] == :single
|
75
74
|
|
76
75
|
preamble_txt = case preamble
|
77
76
|
when TrueClass
|
@@ -115,7 +114,6 @@ module TSV
|
|
115
114
|
break if min.nil?
|
116
115
|
str = []
|
117
116
|
keys.each_with_index do |key,i|
|
118
|
-
|
119
117
|
case key
|
120
118
|
when min
|
121
119
|
str << parts[i] * sep
|
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -275,7 +275,7 @@ module Misc
|
|
275
275
|
num = :current if num.nil?
|
276
276
|
cpus = case num
|
277
277
|
when :current
|
278
|
-
|
278
|
+
10
|
279
279
|
when String
|
280
280
|
num.to_i
|
281
281
|
when Integer
|
@@ -285,10 +285,18 @@ module Misc
|
|
285
285
|
32000 / num
|
286
286
|
end
|
287
287
|
end
|
288
|
-
|
288
|
+
|
289
|
+
#file = caller.first + rand(1000000).to_s if file.nil?
|
289
290
|
index = (0..elems.length-1).to_a.collect{|v| v.to_s }
|
290
|
-
|
291
|
-
|
291
|
+
TSV.traverse index, :cpus => cpus, :bar => "Bootstrap in #{ cpus } cpus: #{ Misc.fingerprint elems }", :into => Set.new do |pos|
|
292
|
+
elem = elems[pos.to_i]
|
293
|
+
elems.annotate elem if elems.respond_to? :annotate
|
294
|
+
begin
|
295
|
+
yield elem
|
296
|
+
rescue Interrupt
|
297
|
+
Log.warn "Process #{Process.pid} was aborted"
|
298
|
+
end
|
299
|
+
nil
|
292
300
|
end
|
293
301
|
end
|
294
302
|
end
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -78,21 +78,17 @@ void post_semaphore(char* name){
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def self.fork_each_on_semaphore(elems, size, file = nil)
|
81
|
-
with_semaphore(size, file) do |file|
|
82
81
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
rescue Interrupt
|
90
|
-
Log.warn "Process #{Process.pid} was aborted"
|
91
|
-
end
|
82
|
+
TSV.traverse elems, :cpus => size, :bar => "Fork each on semaphore: #{ Misc.fingerprint elems }", :into => Set.new do |elem|
|
83
|
+
elems.annotate elem if elems.respond_to? :annotate
|
84
|
+
begin
|
85
|
+
yield elem
|
86
|
+
rescue Interrupt
|
87
|
+
Log.warn "Process #{Process.pid} was aborted"
|
92
88
|
end
|
93
|
-
|
89
|
+
nil
|
94
90
|
end
|
95
|
-
|
91
|
+
nil
|
96
92
|
end
|
97
93
|
|
98
94
|
def self.thread_each_on_semaphore(elems, size)
|
@@ -500,7 +500,7 @@ module Workflow
|
|
500
500
|
options.each{|i,v|
|
501
501
|
case v
|
502
502
|
when Symbol
|
503
|
-
rec_dependency = real_dependencies.collect{|d| [d, d.
|
503
|
+
rec_dependency = real_dependencies.collect{|d| [d, d.rec_dependencies].compact.flatten}.flatten.select{|d| d.task.name == v }.first
|
504
504
|
if (dependency.first.tasks[dependency[1]].input_options[i] || {})[:stream]
|
505
505
|
inputs[i] = rec_dependency.run(true).grace.join.path
|
506
506
|
else
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
Assemble the TSV from a table extracted from a PDF
|
10
|
+
|
11
|
+
$ rbbt tsv assemble_pdf_table file.txt
|
12
|
+
|
13
|
+
When extracting tables from PDF they are often laid out one column at a time, divided by pages.
|
14
|
+
This command takes a file with the following structure:
|
15
|
+
|
16
|
+
1 A few lines containing table headers, one per line
|
17
|
+
2 A group of lines containing the values for the first column of the first page, ending in an empty line
|
18
|
+
3 More groups of lines corresponding to other columns
|
19
|
+
4 Repetitions of 2 and 3 for more pages
|
20
|
+
|
21
|
+
This script will take care of matching the columns read with the headers specified
|
22
|
+
|
23
|
+
-h--help Help
|
24
|
+
EOF
|
25
|
+
|
26
|
+
SOPT.usage if options[:help]
|
27
|
+
|
28
|
+
file = ARGV.shift
|
29
|
+
|
30
|
+
file = STDIN if file == '-' or file.nil?
|
31
|
+
|
32
|
+
|
33
|
+
txt = Misc.fixutf8(TSV.get_stream(file).read)
|
34
|
+
|
35
|
+
header, _sep, rest = txt.strip.partition("\n\n")
|
36
|
+
fields = header.split("\n")
|
37
|
+
num_columns = fields.length
|
38
|
+
|
39
|
+
columns = {}
|
40
|
+
|
41
|
+
num_columns.times do |i|
|
42
|
+
columns[i] = []
|
43
|
+
end
|
44
|
+
|
45
|
+
lines = rest.split("\n")
|
46
|
+
|
47
|
+
while lines and lines.any?
|
48
|
+
first_block = lines[0..lines.index("")-1]
|
49
|
+
block_size = first_block.length
|
50
|
+
lines = lines[block_size+1..-1]
|
51
|
+
columns[0] << first_block
|
52
|
+
(1..num_columns-1).each do |pos|
|
53
|
+
block = lines[0..block_size-1]
|
54
|
+
lines = lines[block_size+1..-1]
|
55
|
+
columns[pos] << block
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
full_columns = []
|
60
|
+
num_columns.times do |i|
|
61
|
+
column = columns[i]
|
62
|
+
full_columns << column.flatten
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "#" << fields * "\t"
|
66
|
+
Misc.zip_fields(full_columns).zip do |values|
|
67
|
+
puts values * "\t"
|
68
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -257,6 +257,7 @@ files:
|
|
257
257
|
- share/rbbt_commands/system/purge
|
258
258
|
- share/rbbt_commands/system/report
|
259
259
|
- share/rbbt_commands/system/status
|
260
|
+
- share/rbbt_commands/tsv/assemble_pdf_table
|
260
261
|
- share/rbbt_commands/tsv/attach
|
261
262
|
- share/rbbt_commands/tsv/change_id
|
262
263
|
- share/rbbt_commands/tsv/get
|