rbbt-util 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
- require 'rbbt/util/tsv'
2
- require 'rbbt/util/open'
3
1
  require 'spreadsheet'
4
-
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tmpfile'
5
4
  class TSV
6
5
  def self.excel2tsv(file, options = {})
7
6
  sheet = options.delete :sheet
data/lib/rbbt/util/log.rb CHANGED
@@ -48,3 +48,8 @@ module Log
48
48
  @@severity = ENV['RBBT_LOG'].to_i
49
49
  end
50
50
  end
51
+
52
+ def ddd(message, file = $stdout)
53
+ Log.debug "DEVEL => " << caller.first
54
+ Log.debug "DEVEL: " << message.inspect
55
+ end
@@ -80,6 +80,14 @@ module Misc
80
80
  new_options
81
81
  end
82
82
 
83
+ def self.process_options(hash, *keys)
84
+ if keys.length == 1
85
+ hash.delete keys.first.to_sym
86
+ else
87
+ keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
88
+ end
89
+ end
90
+
83
91
  def self.string2hash(string)
84
92
 
85
93
  options = {}
@@ -91,7 +99,7 @@ module Misc
91
99
  end
92
100
 
93
101
  option = option.sub(":",'').to_sym if option.chars.first == ':'
94
-
102
+ value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
95
103
 
96
104
  if value == true
97
105
  options[option] = option.to_s.chars.first != '!'
@@ -129,10 +137,18 @@ module Misc
129
137
  fields.each_with_index{|f,i| return i if f =~ field_re}
130
138
  raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
131
139
  end
140
+
141
+ def self.first(list)
142
+ return nil if list.nil?
143
+ return list.first
144
+ end
145
+
132
146
  end
133
147
 
134
148
  module PDF2Text
135
149
  def self.pdf2text(filename)
150
+ require 'rbbt/util/cmd'
151
+ require 'rbbt/util/tmpfile'
136
152
  TmpFile.with_file(Open.read(filename)) do |pdf|
137
153
  CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
138
154
  end
@@ -169,8 +185,20 @@ class NamedArray < Array
169
185
  keys = keys.collect{|k| Misc.field_position(fields, k) }
170
186
  original_values_at(*keys)
171
187
  end
188
+
189
+ def zip_fields
190
+ zipped = self[0].zip(*self[1..-1])
191
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
192
+ zipped
193
+ end
172
194
  end
173
195
 
196
+ def benchmark
197
+ require 'benchmark'
198
+ puts(Benchmark.measure do
199
+ yield
200
+ end)
201
+ end
174
202
 
175
203
  def profile
176
204
  require 'ruby-prof'
@@ -146,6 +146,7 @@ module Open
146
146
  !! (file =~ /\.zip/)
147
147
  end
148
148
 
149
+
149
150
  # Open Read Write
150
151
 
151
152
  def self.open(url, options = {})
@@ -0,0 +1,109 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/open'
3
+ require 'yaml'
4
+
5
+ module Persistence
6
+ require 'rbbt/util/tc_hash'
7
+ TSV = TCHash
8
+
9
+ CACHEDIR="/tmp/tsv_persistent_cache"
10
+ FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
11
+
12
+ def self.cachedir=(cachedir)
13
+ CACHEDIR.replace cachedir
14
+ FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
15
+ end
16
+
17
+ def self.cachedir
18
+ CACHEDIR
19
+ end
20
+
21
+ def self.get_persistence_file(file, prefix, options = {})
22
+ File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
+ end
24
+
25
+ def self.persist(file, prefix = "", persistence_type = :string, options = {})
26
+ options = Misc.add_defaults options, :persistence => true
27
+
28
+ persistence, persistence_file =
29
+ Misc.process_options options, :persistence, :persistence_file
30
+
31
+ filename = Misc.process_options options, :filename
32
+ filename ||= case
33
+ when (String === file and File.exists? file)
34
+ File.expand_path file
35
+ when File === file
36
+ File.expand_path file.path
37
+ when TSV === file
38
+ file.filename
39
+ else
40
+ Digest::MD5.hexdigest(file.inspect)
41
+ end
42
+
43
+ if persistence
44
+ persistence_file ||= get_persistence_file(filename, prefix, options)
45
+
46
+ #{{{ CREATE
47
+ if ! File.exists? persistence_file
48
+ Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
49
+ res = yield file, options, filename, persistence_file
50
+ if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
51
+ data, extra = res
52
+ else
53
+ data, extra = [res, nil]
54
+ end
55
+ ddd data.filename
56
+
57
+ case persistence_type.to_sym
58
+ when :tsv
59
+ if Hash === data or Object::TSV === data
60
+ Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
61
+ per = Persistence::TSV.get persistence_file
62
+ per.write
63
+ data.each{|k,v| per[k.to_s] = v}
64
+ %w(case_insensitive fields key_field type filename). each do |key|
65
+ if data.respond_to? key
66
+ per.send "#{key}=".to_sym, data.send(key.to_sym)
67
+ else
68
+ per.send "#{key}=".to_sym, extra[key.to_sym]
69
+ end
70
+ end
71
+ per.read
72
+
73
+ data = per
74
+ end
75
+ when :string
76
+ Open.write(persistence_file, data.to_s)
77
+ when :marshal
78
+ Open.write(persistence_file, Marshal.dump(data))
79
+ when :yaml
80
+ Open.write(persistence_file, YAML.dump(data))
81
+ end
82
+
83
+ return [data, extra]
84
+
85
+ #{{{ LOAD
86
+ else
87
+ Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
88
+ case persistence_type.to_sym
89
+ when :tsv
90
+ data = Persistence::TSV.get persistence_file
91
+
92
+ extra = {}
93
+ %W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
94
+
95
+ return [data, extra]
96
+ when :string
97
+ return [Open.read(persistence_type), nil]
98
+ when :marshal
99
+ return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
100
+ when :yaml
101
+ return [File.open(persistence_file){|f| YAML.load(f)}, nil]
102
+ end
103
+
104
+ end
105
+ else
106
+ yield file, options
107
+ end
108
+ end
109
+ end
@@ -1,10 +1,60 @@
1
1
  require 'rbbt/util/open'
2
2
  require 'rbbt/util/tsv'
3
3
  require 'rbbt/util/log'
4
- require 'rake'
4
+ require 'rbbt/util/rake'
5
5
 
6
6
  module PKGData
7
- FILES = {} unless defined? FILES
7
+ attr_accessor :claims
8
+ def self.extended(base)
9
+ base.claims = {}
10
+ end
11
+
12
+ module Path
13
+ attr_accessor :base
14
+
15
+ def method_missing(name, *args, &block)
16
+ new = File.join(self.dup, name.to_s)
17
+ new.extend Path
18
+ new.base = base
19
+ new
20
+ end
21
+
22
+ def [](name)
23
+ new = File.join(self.dup, name.to_s)
24
+ new.extend Path
25
+ new.base = base
26
+ new
27
+ end
28
+
29
+ def tsv(options = {})
30
+ produce
31
+ TSV.new self, options
32
+ end
33
+
34
+ def index(field = nil, other = nil, options = {})
35
+ produce
36
+ TSV.index self, options.merge(:target => field, :others => other)
37
+ end
38
+
39
+ def open
40
+ produce
41
+ Open.open(self)
42
+ end
43
+
44
+ def read
45
+ produce
46
+ Open.read(self)
47
+ end
48
+
49
+ def produce
50
+ return if File.exists? self
51
+
52
+ Log.debug("Trying to produce '#{ self }'")
53
+ file, producer = base.reclaim self
54
+ base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
55
+ end
56
+ end
57
+
8
58
  class SharedirNotFoundError < StandardError; end
9
59
 
10
60
  def self.sharedir_for_file(file = __FILE__)
@@ -32,79 +82,81 @@ module PKGData
32
82
  raise SharedirNotFoundError
33
83
  end
34
84
 
35
- def run_rake(path, dir, task = nil)
36
- rakefile = File.join(dir, 'Rakefile')
37
- return nil unless File.exists? rakefile
38
- if task.nil?
39
- task ||= :default
40
- else
41
- task.sub!(/\/$/,'') if String === task
42
- path = File.dirname(path)
43
- end
85
+ def files
86
+ path = datadir.dup.extend Path
87
+ path.base = self
88
+ path
89
+ end
44
90
 
45
- load rakefile
46
- old_dir = FileUtils.pwd
47
- begin
48
- FileUtils.mkdir_p path
49
- FileUtils.chdir path
50
- Rake::Task[task].invoke
51
- Rake::Task[task].reenable
52
- ensure
53
- FileUtils.chdir old_dir
91
+ def in_datadir?(file)
92
+ if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
93
+ true
94
+ else
95
+ false
54
96
  end
55
- true
56
97
  end
57
98
 
58
- def get_datafile(file, path, get, sharedir)
59
- Log.log "Getting data file '#{ file }' into '#{ path }'. Get: #{get.to_s}"
60
-
61
- FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
62
-
63
- case
64
- when get.nil?
65
- load File.join(sharedir, 'install', file)
66
-
67
- when Proc === get
68
- Open.write(path, get.call(file, path))
69
-
70
- when TSV === get
71
- Open.write(path, get.to_s)
99
+ def claim(file, get = nil, subdir = nil, sharedir = nil)
100
+ file = case
101
+ when (file.nil? or file === :all)
102
+ File.join(datadir, subdir.to_s)
103
+ when in_datadir?(file)
104
+ file
105
+ else
106
+ File.join(datadir, subdir.to_s, file.to_s)
107
+ end
108
+
109
+ sharedir ||= PKGData.get_caller_sharedir
110
+ claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
111
+ produce(file, get, subdir, sharedir) if TSV === get
112
+ produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
113
+ end
72
114
 
73
- when String === get
74
- install_dir =File.expand_path(File.join(sharedir, 'install'))
75
- rake_dir = File.join(install_dir, File.dirname(get), file)
76
- rake_task = nil
77
-
78
- until rake_dir == install_dir
79
- return if run_rake(path, rake_dir, rake_task)
80
- rake_task = File.join(File.basename(rake_dir), rake_task || "")
81
- rake_dir = File.dirname(rake_dir)
82
- end
115
+ def reclaim(file)
116
+ file = File.expand_path(file.dup)
117
+ return nil unless in_datadir? file
83
118
 
84
- if (File.exists?(File.join(sharedir, get)) and not File.directory?(File.join(sharedir, get)))
85
- Open.write(path, Open.open(File.join(sharedir, get)))
86
- else
87
- Open.write(path, Open.open(get, :wget_options => {:pipe => true}, :nocache => true))
119
+ while file != File.expand_path(datadir)
120
+ if @claims[file]
121
+ return [file, @claims[file]]
88
122
  end
123
+ file = File.dirname(file)
89
124
  end
125
+ nil
90
126
  end
91
127
 
92
- def add_datafiles(files = {})
93
- files.each do |file, info|
94
- subpath, get, sharedir = info
128
+ def declaim(file)
129
+ @claims.delete file if @claims.include? file
130
+ end
95
131
 
96
- path = File.join(datadir, subpath.to_s, file.to_s)
132
+ def produce_with_rake(rakefile, subdir, file)
133
+ task = File.expand_path(file).sub(/^.*#{Regexp.quote(File.join(datadir, subdir))}\/?/, '')
134
+ RakeHelper.run(rakefile, task, File.join(File.join(datadir, subdir)))
135
+ end
97
136
 
98
- if not File.exists?(path)
99
- sharedir ||= PKGData.get_caller_sharedir
100
- get_datafile(file.to_s, path, get, sharedir)
101
- end
137
+ def produce(file, get, subdir, sharedir)
138
+ Log.low "Getting data file '#{ file }' into '#{ subdir }'. Get: #{get.class}"
102
139
 
103
- FILES[file.to_s] = path
104
- end
105
- end
140
+ FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
106
141
 
107
- def find_datafile(file)
108
- FILES[file.to_s]
142
+ case
143
+ when get.nil?
144
+ FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
145
+ when Proc === get
146
+ Open.write(file, get.call)
147
+ when TSV === get
148
+ Open.write(file, get.to_s)
149
+ when ((String === get or Symbol === get) and File.basename(get.to_s) == "Rakefile")
150
+ if Symbol === get
151
+ rakefile = File.join(sharedir, subdir, get.to_s)
152
+ else
153
+ rakefile = File.join(sharedir, get.to_s)
154
+ end
155
+ produce_with_rake(rakefile, subdir, file)
156
+ when String === get
157
+ Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
158
+ else
159
+ raise "Unknown Get: #{get.class}"
160
+ end
109
161
  end
110
162
  end
@@ -0,0 +1,78 @@
1
+ require 'rbbt/util/tsv'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/util/log'
4
+
5
+ module RakeHelper
6
+ def self.files(rakefile, task = :default, chdir = nil)
7
+ status = nil
8
+ files = nil
9
+ TmpFile.with_file do |f|
10
+ pid = Process.fork{
11
+ require 'rake'
12
+ FileUtils.chdir chdir if chdir
13
+
14
+ Rake::FileTask.module_eval do
15
+ class << self
16
+ alias_method :old_define_task, :define_task
17
+ end
18
+ def self.define_task(file, *args, &block)
19
+ @@files ||= []
20
+ @@files << file
21
+ old_define_task(file, *args, &block)
22
+ end
23
+
24
+ def self.files
25
+ @@files
26
+ end
27
+ end
28
+
29
+ load rakefile
30
+
31
+ Open.write(f, Rake::FileTask.files * "\n")
32
+ exit
33
+ }
34
+
35
+
36
+ pid, status = Process.waitpid2(pid)
37
+ files = Open.read(f).split("\n")
38
+ end
39
+ raise "Error getting files from Rake: #{ rakefile } " unless status.success?
40
+ files
41
+ end
42
+
43
+ def self.run(rakefile, task = :default, chdir = nil)
44
+ pid = Process.fork{
45
+ require 'rake'
46
+ FileUtils.chdir chdir if chdir
47
+
48
+ Rake::FileTask.module_eval do
49
+ class << self
50
+ alias_method :old_define_task, :define_task
51
+ end
52
+ def self.define_task(file, *args, &block)
53
+ @@files ||= []
54
+ @@files << file
55
+ old_define_task(file, *args, &block)
56
+ end
57
+
58
+ def self.files
59
+ @@files
60
+ end
61
+ end
62
+
63
+ load rakefile
64
+
65
+ task(:default) do |t|
66
+ Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
67
+ end
68
+
69
+ Rake::Task[task].invoke
70
+ exit
71
+ }
72
+ pid, status = Process.waitpid2(pid)
73
+
74
+ raise "Error in Rake: #{ rakefile } => #{ task }" unless status.success?
75
+ end
76
+ end
77
+
78
+