rbbt-util 1.1.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,6 @@
1
- require 'rbbt/util/tsv'
2
- require 'rbbt/util/open'
3
1
  require 'spreadsheet'
4
-
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tmpfile'
5
4
  class TSV
6
5
  def self.excel2tsv(file, options = {})
7
6
  sheet = options.delete :sheet
data/lib/rbbt/util/log.rb CHANGED
@@ -48,3 +48,8 @@ module Log
48
48
  @@severity = ENV['RBBT_LOG'].to_i
49
49
  end
50
50
  end
51
+
52
+ def ddd(message, file = $stdout)
53
+ Log.debug "DEVEL => " << caller.first
54
+ Log.debug "DEVEL: " << message.inspect
55
+ end
@@ -80,6 +80,14 @@ module Misc
80
80
  new_options
81
81
  end
82
82
 
83
+ def self.process_options(hash, *keys)
84
+ if keys.length == 1
85
+ hash.delete keys.first.to_sym
86
+ else
87
+ keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
88
+ end
89
+ end
90
+
83
91
  def self.string2hash(string)
84
92
 
85
93
  options = {}
@@ -91,7 +99,7 @@ module Misc
91
99
  end
92
100
 
93
101
  option = option.sub(":",'').to_sym if option.chars.first == ':'
94
-
102
+ value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
95
103
 
96
104
  if value == true
97
105
  options[option] = option.to_s.chars.first != '!'
@@ -129,10 +137,18 @@ module Misc
129
137
  fields.each_with_index{|f,i| return i if f =~ field_re}
130
138
  raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
131
139
  end
140
+
141
+ def self.first(list)
142
+ return nil if list.nil?
143
+ return list.first
144
+ end
145
+
132
146
  end
133
147
 
134
148
  module PDF2Text
135
149
  def self.pdf2text(filename)
150
+ require 'rbbt/util/cmd'
151
+ require 'rbbt/util/tmpfile'
136
152
  TmpFile.with_file(Open.read(filename)) do |pdf|
137
153
  CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
138
154
  end
@@ -169,8 +185,20 @@ class NamedArray < Array
169
185
  keys = keys.collect{|k| Misc.field_position(fields, k) }
170
186
  original_values_at(*keys)
171
187
  end
188
+
189
+ def zip_fields
190
+ zipped = self[0].zip(*self[1..-1])
191
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
192
+ zipped
193
+ end
172
194
  end
173
195
 
196
+ def benchmark
197
+ require 'benchmark'
198
+ puts(Benchmark.measure do
199
+ yield
200
+ end)
201
+ end
174
202
 
175
203
  def profile
176
204
  require 'ruby-prof'
@@ -146,6 +146,7 @@ module Open
146
146
  !! (file =~ /\.zip/)
147
147
  end
148
148
 
149
+
149
150
  # Open Read Write
150
151
 
151
152
  def self.open(url, options = {})
@@ -0,0 +1,109 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/open'
3
+ require 'yaml'
4
+
5
+ module Persistence
6
+ require 'rbbt/util/tc_hash'
7
+ TSV = TCHash
8
+
9
+ CACHEDIR="/tmp/tsv_persistent_cache"
10
+ FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
11
+
12
+ def self.cachedir=(cachedir)
13
+ CACHEDIR.replace cachedir
14
+ FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
15
+ end
16
+
17
+ def self.cachedir
18
+ CACHEDIR
19
+ end
20
+
21
+ def self.get_persistence_file(file, prefix, options = {})
22
+ File.join(CACHEDIR, prefix.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
23
+ end
24
+
25
+ def self.persist(file, prefix = "", persistence_type = :string, options = {})
26
+ options = Misc.add_defaults options, :persistence => true
27
+
28
+ persistence, persistence_file =
29
+ Misc.process_options options, :persistence, :persistence_file
30
+
31
+ filename = Misc.process_options options, :filename
32
+ filename ||= case
33
+ when (String === file and File.exists? file)
34
+ File.expand_path file
35
+ when File === file
36
+ File.expand_path file.path
37
+ when TSV === file
38
+ file.filename
39
+ else
40
+ Digest::MD5.hexdigest(file.inspect)
41
+ end
42
+
43
+ if persistence
44
+ persistence_file ||= get_persistence_file(filename, prefix, options)
45
+
46
+ #{{{ CREATE
47
+ if ! File.exists? persistence_file
48
+ Log.low "Creating Persistence #{ persistence_file } for #{ filename }"
49
+ res = yield file, options, filename, persistence_file
50
+ if Array === res and res.length == 2 and (Hash === res[1] or res[1].nil?)
51
+ data, extra = res
52
+ else
53
+ data, extra = [res, nil]
54
+ end
55
+ ddd data.filename
56
+
57
+ case persistence_type.to_sym
58
+ when :tsv
59
+ if Hash === data or Object::TSV === data
60
+ Log.debug "Creating #{Persistence::TSV} for #{ persistence_file }"
61
+ per = Persistence::TSV.get persistence_file
62
+ per.write
63
+ data.each{|k,v| per[k.to_s] = v}
64
+ %w(case_insensitive fields key_field type filename). each do |key|
65
+ if data.respond_to? key
66
+ per.send "#{key}=".to_sym, data.send(key.to_sym)
67
+ else
68
+ per.send "#{key}=".to_sym, extra[key.to_sym]
69
+ end
70
+ end
71
+ per.read
72
+
73
+ data = per
74
+ end
75
+ when :string
76
+ Open.write(persistence_file, data.to_s)
77
+ when :marshal
78
+ Open.write(persistence_file, Marshal.dump(data))
79
+ when :yaml
80
+ Open.write(persistence_file, YAML.dump(data))
81
+ end
82
+
83
+ return [data, extra]
84
+
85
+ #{{{ LOAD
86
+ else
87
+ Log.low "Opening Persistence #{ persistence_file } for #{ filename }"
88
+ case persistence_type.to_sym
89
+ when :tsv
90
+ data = Persistence::TSV.get persistence_file
91
+
92
+ extra = {}
93
+ %W(case_insensitive fields key_field type filename).each{|key| extra[key.to_sym] = data.send key.to_sym}
94
+
95
+ return [data, extra]
96
+ when :string
97
+ return [Open.read(persistence_type), nil]
98
+ when :marshal
99
+ return [File.open(persistence_file){|f| Marshal.load(f)}, nil]
100
+ when :yaml
101
+ return [File.open(persistence_file){|f| YAML.load(f)}, nil]
102
+ end
103
+
104
+ end
105
+ else
106
+ yield file, options
107
+ end
108
+ end
109
+ end
@@ -1,10 +1,60 @@
1
1
  require 'rbbt/util/open'
2
2
  require 'rbbt/util/tsv'
3
3
  require 'rbbt/util/log'
4
- require 'rake'
4
+ require 'rbbt/util/rake'
5
5
 
6
6
  module PKGData
7
- FILES = {} unless defined? FILES
7
+ attr_accessor :claims
8
+ def self.extended(base)
9
+ base.claims = {}
10
+ end
11
+
12
+ module Path
13
+ attr_accessor :base
14
+
15
+ def method_missing(name, *args, &block)
16
+ new = File.join(self.dup, name.to_s)
17
+ new.extend Path
18
+ new.base = base
19
+ new
20
+ end
21
+
22
+ def [](name)
23
+ new = File.join(self.dup, name.to_s)
24
+ new.extend Path
25
+ new.base = base
26
+ new
27
+ end
28
+
29
+ def tsv(options = {})
30
+ produce
31
+ TSV.new self, options
32
+ end
33
+
34
+ def index(field = nil, other = nil, options = {})
35
+ produce
36
+ TSV.index self, options.merge(:target => field, :others => other)
37
+ end
38
+
39
+ def open
40
+ produce
41
+ Open.open(self)
42
+ end
43
+
44
+ def read
45
+ produce
46
+ Open.read(self)
47
+ end
48
+
49
+ def produce
50
+ return if File.exists? self
51
+
52
+ Log.debug("Trying to produce '#{ self }'")
53
+ file, producer = base.reclaim self
54
+ base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
55
+ end
56
+ end
57
+
8
58
  class SharedirNotFoundError < StandardError; end
9
59
 
10
60
  def self.sharedir_for_file(file = __FILE__)
@@ -32,79 +82,81 @@ module PKGData
32
82
  raise SharedirNotFoundError
33
83
  end
34
84
 
35
- def run_rake(path, dir, task = nil)
36
- rakefile = File.join(dir, 'Rakefile')
37
- return nil unless File.exists? rakefile
38
- if task.nil?
39
- task ||= :default
40
- else
41
- task.sub!(/\/$/,'') if String === task
42
- path = File.dirname(path)
43
- end
85
+ def files
86
+ path = datadir.dup.extend Path
87
+ path.base = self
88
+ path
89
+ end
44
90
 
45
- load rakefile
46
- old_dir = FileUtils.pwd
47
- begin
48
- FileUtils.mkdir_p path
49
- FileUtils.chdir path
50
- Rake::Task[task].invoke
51
- Rake::Task[task].reenable
52
- ensure
53
- FileUtils.chdir old_dir
91
+ def in_datadir?(file)
92
+ if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
93
+ true
94
+ else
95
+ false
54
96
  end
55
- true
56
97
  end
57
98
 
58
- def get_datafile(file, path, get, sharedir)
59
- Log.log "Getting data file '#{ file }' into '#{ path }'. Get: #{get.to_s}"
60
-
61
- FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
62
-
63
- case
64
- when get.nil?
65
- load File.join(sharedir, 'install', file)
66
-
67
- when Proc === get
68
- Open.write(path, get.call(file, path))
69
-
70
- when TSV === get
71
- Open.write(path, get.to_s)
99
+ def claim(file, get = nil, subdir = nil, sharedir = nil)
100
+ file = case
101
+ when (file.nil? or file === :all)
102
+ File.join(datadir, subdir.to_s)
103
+ when in_datadir?(file)
104
+ file
105
+ else
106
+ File.join(datadir, subdir.to_s, file.to_s)
107
+ end
108
+
109
+ sharedir ||= PKGData.get_caller_sharedir
110
+ claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
111
+ produce(file, get, subdir, sharedir) if TSV === get
112
+ produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
113
+ end
72
114
 
73
- when String === get
74
- install_dir =File.expand_path(File.join(sharedir, 'install'))
75
- rake_dir = File.join(install_dir, File.dirname(get), file)
76
- rake_task = nil
77
-
78
- until rake_dir == install_dir
79
- return if run_rake(path, rake_dir, rake_task)
80
- rake_task = File.join(File.basename(rake_dir), rake_task || "")
81
- rake_dir = File.dirname(rake_dir)
82
- end
115
+ def reclaim(file)
116
+ file = File.expand_path(file.dup)
117
+ return nil unless in_datadir? file
83
118
 
84
- if (File.exists?(File.join(sharedir, get)) and not File.directory?(File.join(sharedir, get)))
85
- Open.write(path, Open.open(File.join(sharedir, get)))
86
- else
87
- Open.write(path, Open.open(get, :wget_options => {:pipe => true}, :nocache => true))
119
+ while file != File.expand_path(datadir)
120
+ if @claims[file]
121
+ return [file, @claims[file]]
88
122
  end
123
+ file = File.dirname(file)
89
124
  end
125
+ nil
90
126
  end
91
127
 
92
- def add_datafiles(files = {})
93
- files.each do |file, info|
94
- subpath, get, sharedir = info
128
+ def declaim(file)
129
+ @claims.delete file if @claims.include? file
130
+ end
95
131
 
96
- path = File.join(datadir, subpath.to_s, file.to_s)
132
+ def produce_with_rake(rakefile, subdir, file)
133
+ task = File.expand_path(file).sub(/^.*#{Regexp.quote(File.join(datadir, subdir))}\/?/, '')
134
+ RakeHelper.run(rakefile, task, File.join(File.join(datadir, subdir)))
135
+ end
97
136
 
98
- if not File.exists?(path)
99
- sharedir ||= PKGData.get_caller_sharedir
100
- get_datafile(file.to_s, path, get, sharedir)
101
- end
137
+ def produce(file, get, subdir, sharedir)
138
+ Log.low "Getting data file '#{ file }' into '#{ subdir }'. Get: #{get.class}"
102
139
 
103
- FILES[file.to_s] = path
104
- end
105
- end
140
+ FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
106
141
 
107
- def find_datafile(file)
108
- FILES[file.to_s]
142
+ case
143
+ when get.nil?
144
+ FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
145
+ when Proc === get
146
+ Open.write(file, get.call)
147
+ when TSV === get
148
+ Open.write(file, get.to_s)
149
+ when ((String === get or Symbol === get) and File.basename(get.to_s) == "Rakefile")
150
+ if Symbol === get
151
+ rakefile = File.join(sharedir, subdir, get.to_s)
152
+ else
153
+ rakefile = File.join(sharedir, get.to_s)
154
+ end
155
+ produce_with_rake(rakefile, subdir, file)
156
+ when String === get
157
+ Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
158
+ else
159
+ raise "Unknown Get: #{get.class}"
160
+ end
109
161
  end
110
162
  end
@@ -0,0 +1,78 @@
1
+ require 'rbbt/util/tsv'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/util/log'
4
+
5
+ module RakeHelper
6
+ def self.files(rakefile, task = :default, chdir = nil)
7
+ status = nil
8
+ files = nil
9
+ TmpFile.with_file do |f|
10
+ pid = Process.fork{
11
+ require 'rake'
12
+ FileUtils.chdir chdir if chdir
13
+
14
+ Rake::FileTask.module_eval do
15
+ class << self
16
+ alias_method :old_define_task, :define_task
17
+ end
18
+ def self.define_task(file, *args, &block)
19
+ @@files ||= []
20
+ @@files << file
21
+ old_define_task(file, *args, &block)
22
+ end
23
+
24
+ def self.files
25
+ @@files
26
+ end
27
+ end
28
+
29
+ load rakefile
30
+
31
+ Open.write(f, Rake::FileTask.files * "\n")
32
+ exit
33
+ }
34
+
35
+
36
+ pid, status = Process.waitpid2(pid)
37
+ files = Open.read(f).split("\n")
38
+ end
39
+ raise "Error getting files from Rake: #{ rakefile } " unless status.success?
40
+ files
41
+ end
42
+
43
+ def self.run(rakefile, task = :default, chdir = nil)
44
+ pid = Process.fork{
45
+ require 'rake'
46
+ FileUtils.chdir chdir if chdir
47
+
48
+ Rake::FileTask.module_eval do
49
+ class << self
50
+ alias_method :old_define_task, :define_task
51
+ end
52
+ def self.define_task(file, *args, &block)
53
+ @@files ||= []
54
+ @@files << file
55
+ old_define_task(file, *args, &block)
56
+ end
57
+
58
+ def self.files
59
+ @@files
60
+ end
61
+ end
62
+
63
+ load rakefile
64
+
65
+ task(:default) do |t|
66
+ Rake::FileTask.files.each do |file| Rake::Task[file].invoke end
67
+ end
68
+
69
+ Rake::Task[task].invoke
70
+ exit
71
+ }
72
+ pid, status = Process.waitpid2(pid)
73
+
74
+ raise "Error in Rake: #{ rakefile } => #{ task }" unless status.success?
75
+ end
76
+ end
77
+
78
+