rbbt-util 5.14.16 → 5.14.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +0 -1
- data/etc/app.d/base.rb +3 -1
- data/lib/rbbt/annotations/util.rb +4 -1
- data/lib/rbbt/association/index.rb +1 -1
- data/lib/rbbt/resource/path.rb +9 -1
- data/lib/rbbt/tsv/stream.rb +1 -3
- data/lib/rbbt/util/misc/bgzf.rb +2 -2
- data/lib/rbbt/util/misc/development.rb +12 -4
- data/lib/rbbt/util/semaphore.rb +8 -12
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/share/rbbt_commands/tsv/assemble_pdf_table +68 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c5725c2ab4aa762ca676d5d4e18f17dee2e8e92
|
4
|
+
data.tar.gz: 088c84611fccd379434220853ce601256540b9cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aa7267702a7587187a85c30fb4bf646efb298ff8d92d2f9aa23daeb58e19ce89dddd470f9d36fcb4258f5bda0e37a6d6bb98327c5308f33b12e79dcaa935e72
|
7
|
+
data.tar.gz: 60796a47e6344334dc576c850df51d405796d8e98f881fcf487f53a23eeaa6db54e0f79207682c28f59b8b694eab063dfbe1395ec114167e3cca9c70b5984116
|
data/bin/rbbt
CHANGED
data/etc/app.d/base.rb
CHANGED
@@ -23,7 +23,9 @@ use Rack::Session::Cookie, :key => 'rack.session',
|
|
23
23
|
|
24
24
|
|
25
25
|
#{{{ DIRECTORIES
|
26
|
-
|
26
|
+
global_var = Rbbt.var
|
27
|
+
local_var = Rbbt.var.app[$class_name]
|
28
|
+
|
27
29
|
set :cache_dir , local_var.sinatra.cache.find
|
28
30
|
set :persist_dir , local_var.sinatra.cache.persistence.find
|
29
31
|
set :persist_options , {:persist => true, :persist_dir => :persist_dir}
|
@@ -50,10 +50,13 @@ module Annotated
|
|
50
50
|
info = {}
|
51
51
|
fields.each_with_index do |field,i|
|
52
52
|
next if field == "literal"
|
53
|
-
|
53
|
+
case field
|
54
|
+
when "JSON"
|
54
55
|
JSON.parse(values[i]).each do |key, value|
|
55
56
|
info[key.to_sym] = value
|
56
57
|
end
|
58
|
+
when nil
|
59
|
+
next
|
57
60
|
else
|
58
61
|
info[field.to_sym] = resolve_array(values[i])
|
59
62
|
end
|
@@ -74,7 +74,7 @@ module Association
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def subset(source, target)
|
77
|
-
return [] if source.nil? or target.nil?
|
77
|
+
return [] if source.nil? or target.nil? or source.empty? or target.empty?
|
78
78
|
|
79
79
|
if source == :all or source == "all"
|
80
80
|
if target == :all or target == "all"
|
data/lib/rbbt/resource/path.rb
CHANGED
@@ -40,7 +40,7 @@ module Path
|
|
40
40
|
|
41
41
|
def glob(pattern = '*')
|
42
42
|
if self.include? "*"
|
43
|
-
|
43
|
+
self.glob_all
|
44
44
|
else
|
45
45
|
return [] unless self.exists?
|
46
46
|
exp = File.join(self.find, pattern)
|
@@ -156,6 +156,14 @@ module Path
|
|
156
156
|
compact.select{|file| file.exists? }.uniq
|
157
157
|
end
|
158
158
|
|
159
|
+
def glob_all(caller_lib = nil, search_paths = nil)
|
160
|
+
search_paths ||= self.search_paths || SEARCH_PATHS
|
161
|
+
search_paths = search_paths.dup
|
162
|
+
|
163
|
+
search_paths.keys.
|
164
|
+
collect{|where| Dir.glob(find(where, Path.caller_lib_dir, search_paths))}.
|
165
|
+
compact.flatten.collect{|path| Path.setup(path, self.resource, self.pkgdir)}
|
166
|
+
end
|
159
167
|
#{{{ Methods
|
160
168
|
|
161
169
|
def in_dir?(dir)
|
data/lib/rbbt/tsv/stream.rb
CHANGED
@@ -20,8 +20,6 @@ module TSV
|
|
20
20
|
options = Misc.add_defaults options, :sep => "\t", :sort => true
|
21
21
|
sort, sep, preamble = Misc.process_options options, :sort, :sep, :preamble
|
22
22
|
|
23
|
-
|
24
|
-
|
25
23
|
out = Misc.open_pipe do |sin|
|
26
24
|
|
27
25
|
streams = streams.collect do |stream|
|
@@ -72,6 +70,7 @@ module TSV
|
|
72
70
|
key_field = key_fields.compact.first
|
73
71
|
fields = fields.compact.flatten
|
74
72
|
options = options.merge(input_options.first)
|
73
|
+
options[:type] = :list if options[:type] == :single
|
75
74
|
|
76
75
|
preamble_txt = case preamble
|
77
76
|
when TrueClass
|
@@ -115,7 +114,6 @@ module TSV
|
|
115
114
|
break if min.nil?
|
116
115
|
str = []
|
117
116
|
keys.each_with_index do |key,i|
|
118
|
-
|
119
117
|
case key
|
120
118
|
when min
|
121
119
|
str << parts[i] * sep
|
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -275,7 +275,7 @@ module Misc
|
|
275
275
|
num = :current if num.nil?
|
276
276
|
cpus = case num
|
277
277
|
when :current
|
278
|
-
|
278
|
+
10
|
279
279
|
when String
|
280
280
|
num.to_i
|
281
281
|
when Integer
|
@@ -285,10 +285,18 @@ module Misc
|
|
285
285
|
32000 / num
|
286
286
|
end
|
287
287
|
end
|
288
|
-
|
288
|
+
|
289
|
+
#file = caller.first + rand(1000000).to_s if file.nil?
|
289
290
|
index = (0..elems.length-1).to_a.collect{|v| v.to_s }
|
290
|
-
|
291
|
-
|
291
|
+
TSV.traverse index, :cpus => cpus, :bar => "Bootstrap in #{ cpus } cpus: #{ Misc.fingerprint elems }", :into => Set.new do |pos|
|
292
|
+
elem = elems[pos.to_i]
|
293
|
+
elems.annotate elem if elems.respond_to? :annotate
|
294
|
+
begin
|
295
|
+
yield elem
|
296
|
+
rescue Interrupt
|
297
|
+
Log.warn "Process #{Process.pid} was aborted"
|
298
|
+
end
|
299
|
+
nil
|
292
300
|
end
|
293
301
|
end
|
294
302
|
end
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -78,21 +78,17 @@ void post_semaphore(char* name){
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def self.fork_each_on_semaphore(elems, size, file = nil)
|
81
|
-
with_semaphore(size, file) do |file|
|
82
81
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
rescue Interrupt
|
90
|
-
Log.warn "Process #{Process.pid} was aborted"
|
91
|
-
end
|
82
|
+
TSV.traverse elems, :cpus => size, :bar => "Fork each on semaphore: #{ Misc.fingerprint elems }", :into => Set.new do |elem|
|
83
|
+
elems.annotate elem if elems.respond_to? :annotate
|
84
|
+
begin
|
85
|
+
yield elem
|
86
|
+
rescue Interrupt
|
87
|
+
Log.warn "Process #{Process.pid} was aborted"
|
92
88
|
end
|
93
|
-
|
89
|
+
nil
|
94
90
|
end
|
95
|
-
|
91
|
+
nil
|
96
92
|
end
|
97
93
|
|
98
94
|
def self.thread_each_on_semaphore(elems, size)
|
@@ -500,7 +500,7 @@ module Workflow
|
|
500
500
|
options.each{|i,v|
|
501
501
|
case v
|
502
502
|
when Symbol
|
503
|
-
rec_dependency = real_dependencies.collect{|d| [d, d.
|
503
|
+
rec_dependency = real_dependencies.collect{|d| [d, d.rec_dependencies].compact.flatten}.flatten.select{|d| d.task.name == v }.first
|
504
504
|
if (dependency.first.tasks[dependency[1]].input_options[i] || {})[:stream]
|
505
505
|
inputs[i] = rec_dependency.run(true).grace.join.path
|
506
506
|
else
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
Assemble the TSV from a table extracted from a PDF
|
10
|
+
|
11
|
+
$ rbbt tsv assemble_pdf_table file.txt
|
12
|
+
|
13
|
+
When extracting tables from PDF they are often laid out one column at a time, divided by pages.
|
14
|
+
This command takes a file with the following structure:
|
15
|
+
|
16
|
+
1 A few lines containing table headers, one per line
|
17
|
+
2 A group of lines containing the values for the first column of the first page, ending in an empty line
|
18
|
+
3 More groups of lines corresponding to other columns
|
19
|
+
4 Repetitions of 2 and 3 for more pages
|
20
|
+
|
21
|
+
This script will take care of matching the columns read with the headers specified
|
22
|
+
|
23
|
+
-h--help Help
|
24
|
+
EOF
|
25
|
+
|
26
|
+
SOPT.usage if options[:help]
|
27
|
+
|
28
|
+
file = ARGV.shift
|
29
|
+
|
30
|
+
file = STDIN if file == '-' or file.nil?
|
31
|
+
|
32
|
+
|
33
|
+
txt = Misc.fixutf8(TSV.get_stream(file).read)
|
34
|
+
|
35
|
+
header, _sep, rest = txt.strip.partition("\n\n")
|
36
|
+
fields = header.split("\n")
|
37
|
+
num_columns = fields.length
|
38
|
+
|
39
|
+
columns = {}
|
40
|
+
|
41
|
+
num_columns.times do |i|
|
42
|
+
columns[i] = []
|
43
|
+
end
|
44
|
+
|
45
|
+
lines = rest.split("\n")
|
46
|
+
|
47
|
+
while lines and lines.any?
|
48
|
+
first_block = lines[0..lines.index("")-1]
|
49
|
+
block_size = first_block.length
|
50
|
+
lines = lines[block_size+1..-1]
|
51
|
+
columns[0] << first_block
|
52
|
+
(1..num_columns-1).each do |pos|
|
53
|
+
block = lines[0..block_size-1]
|
54
|
+
lines = lines[block_size+1..-1]
|
55
|
+
columns[pos] << block
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
full_columns = []
|
60
|
+
num_columns.times do |i|
|
61
|
+
column = columns[i]
|
62
|
+
full_columns << column.flatten
|
63
|
+
end
|
64
|
+
|
65
|
+
puts "#" << fields * "\t"
|
66
|
+
Misc.zip_fields(full_columns).zip do |values|
|
67
|
+
puts values * "\t"
|
68
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-08-
|
11
|
+
date: 2014-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -257,6 +257,7 @@ files:
|
|
257
257
|
- share/rbbt_commands/system/purge
|
258
258
|
- share/rbbt_commands/system/report
|
259
259
|
- share/rbbt_commands/system/status
|
260
|
+
- share/rbbt_commands/tsv/assemble_pdf_table
|
260
261
|
- share/rbbt_commands/tsv/attach
|
261
262
|
- share/rbbt_commands/tsv/change_id
|
262
263
|
- share/rbbt_commands/tsv/get
|