rbbt-util 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rbbt/util/cmd.rb CHANGED
@@ -3,20 +3,27 @@ require 'rbbt/util/log'
3
3
  require 'stringio'
4
4
 
5
5
  module CMD
6
- class CMDError < StandardError;end
6
+ class CMDError < RBBTError;end
7
7
 
8
8
  module SmartIO
9
- def self.tie(io, pid = nil, post = nil)
9
+ def self.tie(io, pid = nil, cmd = "", post = nil)
10
10
  io.instance_eval{
11
11
  @pid = pid
12
+ @cmd = cmd
12
13
  @post = post
13
14
  alias original_close close
14
15
  def close
15
16
  begin
16
- Process.waitpid(@pid, Process::WNOHANG) if @pid
17
+ Process.waitpid(@pid) if @pid
17
18
  rescue
18
19
  end
19
20
 
21
+ if $? and not $?.success?
22
+ Log.debug "Raising exception"
23
+ exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
24
+ raise exception
25
+ end
26
+
20
27
  @post.call if @post
21
28
  original_close
22
29
  end
@@ -107,6 +114,8 @@ module CMD
107
114
  sout.last.close
108
115
  serr.last.close
109
116
 
117
+ Log.debug "CMD: [#{pid}] #{cmd}"
118
+
110
119
  case
111
120
  when String === in_content
112
121
  sin.last.write in_content
@@ -120,20 +129,39 @@ module CMD
120
129
  end
121
130
  end
122
131
 
123
- Thread.new do
124
- while l = serr.first.gets
125
- Log.log l, stderr if Integer === stderr
132
+ if pipe
133
+ Thread.new do
134
+ while l = serr.first.gets
135
+ Log.log l, stderr if Integer === stderr
136
+ end
137
+ serr.first.close
126
138
  end
127
- serr.first.close
128
- end
129
139
 
130
- if pipe
131
- SmartIO.tie sout.first, pid, post
140
+ SmartIO.tie sout.first, pid, cmd, post
132
141
  sout.first
142
+
133
143
  else
144
+ err = ""
145
+ Thread.new do
146
+ while l = serr.first.gets
147
+ err << l if Integer === stderr
148
+ end
149
+ serr.first.close
150
+ end
151
+
134
152
  out = StringIO.new sout.first.read
135
- SmartIO.tie out
153
+ SmartIO.tie out, pid, cmd, post
154
+
136
155
  Process.waitpid pid
156
+
157
+ if not $?.success?
158
+ exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}"
159
+ exception.info = err if Integer === stderr and stderr >= Log.severity
160
+ raise exception
161
+ else
162
+ Log.log err, stderr if Integer === stderr
163
+ end
164
+
137
165
  out
138
166
  end
139
167
  end
@@ -38,6 +38,7 @@ module DataModule
38
38
  pkg_module.add_datafiles filename => ['', self.to_s, sharedir]
39
39
  rescue
40
40
  Log.debug $!.message
41
+ Log.debug $!.backtrace * "\n"
41
42
  old_method_missing name, *args, &block
42
43
  end
43
44
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/tsv'
2
+ require 'rbbt/util/open'
2
3
  require 'spreadsheet'
3
4
 
4
5
  class TSV
@@ -8,7 +9,7 @@ class TSV
8
9
  header = true unless header == false
9
10
  sheet ||= 0
10
11
  TmpFile.with_file do |filename|
11
- workbook = Spreadsheet.open File.open(file)
12
+ workbook = Spreadsheet.open Open.open(file)
12
13
  sheet = workbook.worksheet sheet
13
14
 
14
15
  rows = []
data/lib/rbbt/util/log.rb CHANGED
@@ -5,11 +5,11 @@ module Log
5
5
  MEDIUM = 2
6
6
  HIGH = 3
7
7
 
8
- def severity=(severity)
8
+ def self.severity=(severity)
9
9
  @@severity = severity
10
10
  end
11
11
 
12
- def severity
12
+ def self.severity
13
13
  @@severity
14
14
  end
15
15
 
@@ -1,7 +1,25 @@
1
1
  require 'iconv'
2
+
3
+ class RBBTError < StandardError
4
+ attr_accessor :info
5
+
6
+ alias old_to_s to_s
7
+ def to_s
8
+ str = old_to_s
9
+ if info
10
+ str << "\n" << "Additional Info:\n---\n" << info << "---"
11
+ end
12
+ str
13
+ end
14
+ end
15
+
2
16
  module Misc
3
17
  class FieldNotFoundError < StandardError;end
4
18
 
19
+ def self.this_dir
20
+ File.expand_path(File.dirname(caller[0]))
21
+ end
22
+
5
23
  def self.env_add(var, value, sep = ":", prepend = true)
6
24
  ENV[var] ||= ""
7
25
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
@@ -113,6 +131,14 @@ module Misc
113
131
  end
114
132
  end
115
133
 
134
+ module PDF2Text
135
+ def self.pdf2text(filename)
136
+ TmpFile.with_file(Open.read(filename)) do |pdf|
137
+ CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
138
+ end
139
+ end
140
+ end
141
+
116
142
  class NamedArray < Array
117
143
  attr_accessor :fields
118
144
 
@@ -34,6 +34,7 @@ module Open
34
34
  end
35
35
 
36
36
  def self.wget(url, options = {})
37
+ Log.low "WGET:\n -URL: #{ url }\n -OPTIONS: #{options.inspect}"
37
38
  options = Misc.add_defaults options, "--user-agent=" => 'firefox', :pipe => true
38
39
 
39
40
  wait(options[:nice], options[:nice_key]) if options[:nice]
@@ -42,7 +43,18 @@ module Open
42
43
 
43
44
  pipe = options.delete(:pipe)
44
45
  quiet = options.delete(:quiet)
45
- options["--quiet"] = quiet if options["--quiet"].nil?
46
+ post = options.delete(:post)
47
+ cookies = options.delete(:cookies)
48
+
49
+ options["--quiet"] = quiet if options["--quiet"].nil?
50
+ options["--post-data="] ||= post if post
51
+
52
+ if cookies
53
+ options["--save-cookies"] = cookies
54
+ options["--load-cookies"] = cookies
55
+ options["--keep-session-cookies"] = true
56
+ end
57
+
46
58
 
47
59
  stderr = case
48
60
  when options['stderr']
@@ -52,6 +64,7 @@ module Open
52
64
  else
53
65
  nil
54
66
  end
67
+
55
68
  begin
56
69
  CMD.cmd("wget '#{ url }'", options.merge(
57
70
  '-O' => '-',
@@ -141,6 +154,9 @@ module Open
141
154
  wget_options = options[:wget_options] || {}
142
155
  wget_options[:nice] = options.delete(:nice)
143
156
  wget_options[:nice_key] = options.delete(:nice_key)
157
+ wget_options[:quiet] = options.delete(:quiet)
158
+ wget_options[:post] = options.delete(:post)
159
+ wget_options[:cookies] = options.delete(:cookies)
144
160
 
145
161
  io = case
146
162
  when (not remote?(url))
@@ -155,8 +171,8 @@ module Open
155
171
  io.close
156
172
  file_open(in_cache(url), options[:grep])
157
173
  end
158
- io = unzip(io) if zip? url and not options[:noz]
159
- io = gunzip(io) if gzip? url and not options[:noz]
174
+ io = unzip(io) if (zip? url and not options[:noz]) or options[:zip]
175
+ io = gunzip(io) if (gzip? url and not options[:noz]) or options[:gzip]
160
176
 
161
177
  io
162
178
  end
@@ -70,6 +70,7 @@ module PKGSoftware
70
70
  if not File.exists?(path)
71
71
  sharedir ||= PKGSoftware.get_caller_sharedir
72
72
  get_pkg(pkg.to_s, path, get, sharedir)
73
+ setup_env(software_dir)
73
74
  end
74
75
 
75
76
  SOFTWARE[pkg.to_s] = path
@@ -80,7 +81,6 @@ module PKGSoftware
80
81
  SOFTWARE[pkg.to_s]
81
82
  end
82
83
 
83
-
84
84
  def setup_env(software_dir)
85
85
  Misc.env_add 'PATH', bin_dir
86
86
 
@@ -127,4 +127,5 @@ module PKGSoftware
127
127
 
128
128
  CMD.cmd(File.join(opt_dir, '.post_install'))
129
129
  end
130
+
130
131
  end
@@ -39,7 +39,7 @@ class TCHash < TokyoCabinet::HDB
39
39
  alias original_keys keys
40
40
  def keys
41
41
  list = self.original_keys
42
- indexes = FIELD_INFO_ENTRIES.values.collect do |field| list.index(field) end.compact
42
+ indexes = FIELD_INFO_ENTRIES.values.collect do |field| list.index(field) end.compact.sort.reverse
43
43
  indexes.each do |index| list.delete_at index end
44
44
  list
45
45
  end
@@ -48,19 +48,12 @@ class TCHash < TokyoCabinet::HDB
48
48
  def values
49
49
  values = self.original_values
50
50
  keys = self.original_keys
51
- indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact
51
+ indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
52
52
  indexes.each do |index| values.delete_at index end
53
53
 
54
54
  values.collect{|v| Serializer.load(v)}
55
55
  end
56
56
 
57
- def merge!(data)
58
- new_data = {}
59
- data.each do |key, values|
60
- self[key] = values
61
- end
62
- end
63
-
64
57
  # This version of each fixes a problem in ruby 1.9. It also
65
58
  # removes the special entries
66
59
  def each19(&block)
@@ -77,10 +70,17 @@ class TCHash < TokyoCabinet::HDB
77
70
 
78
71
  def collect
79
72
  res = []
80
- self.each{|k, v| res << [k,v]}
73
+ self.each{|k, v| res << yield(k,v)}
81
74
  res
82
75
  end
83
76
 
77
+ def merge!(data)
78
+ new_data = {}
79
+ data.each do |key, values|
80
+ self[key] = values
81
+ end
82
+ end
83
+
84
84
  alias original_open open
85
85
  def open(write = false)
86
86
  flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
data/lib/rbbt/util/tsv.rb CHANGED
@@ -2,6 +2,7 @@ require 'rbbt/util/misc'
2
2
  require 'rbbt/util/open'
3
3
  require 'rbbt/util/tc_hash'
4
4
  require 'rbbt/util/tmpfile'
5
+ require 'rbbt/util/log'
5
6
  require 'digest'
6
7
  require 'fileutils'
7
8
 
@@ -16,6 +17,13 @@ end
16
17
  class TSV
17
18
  class FieldNotFoundError < StandardError;end
18
19
 
20
+ module Field
21
+ def ==(string)
22
+ return false unless String === string
23
+ self.sub(/#.*/,'').casecmp(string.sub(/#.*/,'')) == 0
24
+ end
25
+ end
26
+
19
27
  #{{{ Persistence
20
28
 
21
29
  PersistenceHash = TCHash
@@ -36,14 +44,7 @@ class TSV
36
44
  File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
37
45
  end
38
46
 
39
- @debug = ENV['TSV_DEBUG'] == "true"
40
- def self.log(message)
41
- STDERR.puts message if @debug == true
42
- end
43
-
44
- def self.debug=(value)
45
- @debug = value
46
- end
47
+ #{{{ Headers and Field Stuff
47
48
 
48
49
  def self.headers(file, options = {})
49
50
  if file =~ /(.*)#(.*)/ and File.exists? $1
@@ -63,742 +64,850 @@ class TSV
63
64
  end
64
65
  end
65
66
 
66
- #{{{ Accesor Methods
67
-
68
- def keys
69
- @data.keys
67
+ def self.fields_include(key_field, fields, field)
68
+ return true if key_field == field or fields.include? field
69
+ return false
70
70
  end
71
71
 
72
- def values
73
- @data.values
72
+ def self.field_positions(key_field, fields, *selected)
73
+ selected.collect do |sel|
74
+ case
75
+ when (sel.nil? or sel == :main or sel == key_field)
76
+ -1
77
+ when Integer === sel
78
+ sel
79
+ else
80
+ Misc.field_position fields, sel
81
+ end
82
+ end
74
83
  end
75
84
 
76
- def size
77
- @data.size
85
+ def fields_include(field)
86
+ return TSV.fields_include key_field, fields, field
78
87
  end
79
88
 
80
- # Write
89
+ def field_positions(*selected)
90
+ return nil if selected.nil? or selected == [nil]
91
+ TSV.field_positions(key_field, fields, *selected)
92
+ end
81
93
 
82
- def []=(key, value)
83
- key = key.downcase if @case_insensitive
84
- @data[key] = value
94
+ def fields_at(*positions)
95
+ return nil if fields.nil?
96
+ return nil if positions.nil? or positions == [nil]
97
+ (fields + [key_field]).values_at(*positions)
85
98
  end
86
99
 
100
+ #{{{ Iteration, Merging, etc
101
+ def through(new_key_field = nil, new_fields = nil, &block)
102
+ new_key_position = (field_positions(new_key_field) || [-1]).first
103
+ new_fields = [new_fields] if String === new_fields
87
104
 
88
- def merge!(new_data)
89
- new_data.each do |key, value|
90
- self[key] = value
91
- end
92
- end
105
+ if new_key_position == -1
93
106
 
94
- # Read
107
+ if new_fields.nil? or new_fields == fields
108
+ each &block
109
+ return [key_field, fields]
110
+ else
111
+ new_field_positions = field_positions(*new_fields)
112
+ each do |key, values|
113
+ if values.nil?
114
+ yield key, nil
115
+ else
116
+ yield key, values.values_at(*new_field_positions)
117
+ end
118
+ end
119
+ return [key_field, fields_at(*new_field_positions)]
120
+ end
95
121
 
96
- def follow(value)
97
- if String === value && value =~ /__Ref:(.*)/
98
- return self[$1]
99
122
  else
100
- value = NamedArray.name value, fields if Array === value and fields
101
- value
123
+ new_field_positions = field_positions(*new_fields)
124
+
125
+ new_field_names = fields_at(*new_field_positions)
126
+ if new_field_names.nil? and fields
127
+ new_field_names = fields.dup
128
+ new_field_names.delete_at new_key_position
129
+ new_field_names.unshift key_field
130
+ end
131
+
132
+ each do |key, values|
133
+ if list
134
+ tmp_values = values + [[key]]
135
+ else
136
+ tmp_values = values + [key]
137
+ end
138
+
139
+ if new_field_positions.nil?
140
+ new_values = values.dup
141
+ new_values.delete_at new_key_position
142
+ new_values.unshift [key]
143
+ else
144
+ new_values = tmp_values.values_at(*new_field_positions)
145
+ end
146
+
147
+ tmp_values[new_key_position].each do |new_key|
148
+ if new_field_names
149
+ yield new_key, NamedArray.name(new_values, new_field_names)
150
+ else
151
+ yield new_key, new_values
152
+ end
153
+ end
154
+ end
155
+ return [(fields_at(new_key_position) || [nil]).first, new_field_names]
102
156
  end
103
157
  end
104
- def [](key)
105
- if Array === key
106
- return @data[key] if @data[key] != nil
107
- key.each{|k| v = self[k]; return v unless v.nil?}
108
- return nil
158
+
159
+ def process(field)
160
+ through do |key, values|
161
+ values[field].replace yield(values[field], key, values) unless values[field].nil?
109
162
  end
110
-
111
- key = key.downcase if @case_insensitive
112
- follow @data[key]
113
163
  end
114
164
 
115
- def values_at(*keys)
116
- keys.collect{|k|
117
- self[k]
118
- }
119
- end
120
165
 
121
- def each(&block)
122
- @data.each do |key, value|
123
- block.call(key, follow(value))
124
- end
125
- end
166
+ def reorder(new_key_field, new_fields = nil, options = {})
167
+ options = Misc.add_defaults options
168
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
126
169
 
127
- def collect
128
- if block_given?
129
- @data.collect do |key, value|
130
- value = follow(value)
131
- key, values = yield key, value
132
- end
133
- else
134
- @data.collect do |key, value|
135
- [key, follow(value)]
170
+ new = {}
171
+ new_key_field, new_fields = through new_key_field, new_fields do |key, values|
172
+ if new[key].nil?
173
+ new[key] = values
174
+ else
175
+ new[key] = new[key].zip(values)
136
176
  end
137
177
  end
138
- end
139
178
 
140
- def sort(&block)
141
- collect.sort(&block).collect{|p|
142
- key, value = p
143
- value = NamedArray.name value, fields if fields
144
- [key, value]
145
- }
146
- end
179
+ new.each do |key,values|
180
+ values.each{|list| list.flatten! if Array === list}
181
+ end
147
182
 
148
- def sort_by(&block)
149
- collect.sort_by &block
150
- end
183
+ if options[:persistence_file]
184
+ reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
185
+ reordered.merge! new
186
+ else
187
+ reordered = TSV.new(new, :case_insensitive => case_insensitive)
188
+ end
151
189
 
152
- #{{{ Parsing
153
-
154
- def self.parse_fields(io, delimiter = "\t")
155
- return [] if io.nil?
156
- fields = io.split(delimiter, -1)
157
- fields
158
- end
190
+ reordered.key_field = new_key_field
191
+ reordered.fields = new_fields
159
192
 
160
- def self.zip_fields(list, fields = nil)
161
- return [] if list.nil? || list.empty?
162
- fields ||= list.fields if list.respond_to? :fields
163
- zipped = list[0].zip(*list[1..-1])
164
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
165
- zipped
193
+ reordered
166
194
  end
167
-
168
- def self.parse(data, file, options = {})
169
195
 
170
- # Prepare options
171
- options = add_defaults options,
172
- :sep => "\t",
173
- :sep2 => "|",
174
- :native => 0,
175
- :extra => nil,
176
- :fix => nil,
177
- :exclude => nil,
178
- :select => nil,
179
- :grep => nil,
180
- :single => false,
181
- :unique => false,
182
- :flatten => false,
183
- :overwrite => false,
184
- :keep_empty => true,
185
- :case_insensitive => false,
186
- :header_hash => '#' ,
187
- :persistence_file => nil
196
+ def slice(new_fields, options = {})
197
+ reorder(:main, new_fields)
198
+ end
188
199
 
189
- options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
190
- options[:flatten] = true if options[:single]
200
+ def add_field(name = nil)
201
+ each do |key, values|
202
+ self[key] = values << yield(key, values)
203
+ end
191
204
 
205
+ fields << name if list
206
+ if PersistenceHash === @data
207
+ @data.fields = fields
208
+ end
209
+ end
192
210
 
211
+ def select(method)
212
+ new = TSV.new({})
213
+ new.key_field = key_field
214
+ new.fields = fields.dup
215
+
216
+ case
217
+ when Array === method
218
+ through do |key, values|
219
+ new[key] = values if ([key,values].flatten & method).any?
220
+ end
221
+ when Regexp === method
222
+ through do |key, values|
223
+ new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
224
+ end
225
+ when Hash === method
226
+ key = method.keys.first
227
+ method = method.values.first
228
+ case
229
+ when (Array === method and (:main == key or key_field == key))
230
+ method.each{|item| if values = self[item]; then new[item] = values; end}
231
+ when Array === method
232
+ through :main, key do |key, values|
233
+ new[key] = values if (values.flatten & method).any?
234
+ end
235
+ when Regexp === method
236
+ through :main, key do |key, values|
237
+ new[key] = values if values.flatten.select{|v| v =~ method}.any?
238
+ end
239
+ end
240
+ end
193
241
 
194
- #{{{ Process first line
242
+ new
243
+ end
195
244
 
196
- line = file.gets
197
- raise "Empty content" if line.nil?
198
- line.chomp!
245
+ def index(options = {})
246
+ options = Misc.add_defaults options, :order => false
199
247
 
200
- if line =~ /^#{options[:header_hash]}/
201
- header_fields = parse_fields(line, options[:sep])
202
- header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
203
- line = file.gets
204
- else
205
- header_fields = nil
248
+ if options[:persistence] and ! options[:persistence_file]
249
+ options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
206
250
  end
207
-
208
- id_pos = Misc.field_position(header_fields, options[:native])
209
251
 
210
- if options[:extra].nil?
211
- extra_pos = nil
212
- max_cols = 0
213
- else
214
- extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
252
+ if options[:persistence_file] and File.exists?(options[:persistence_file])
253
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
215
254
  end
216
255
 
217
- #{{{ Process rest
218
- while line do
219
- line.chomp!
220
-
221
- line = options[:fix].call line if options[:fix]
256
+ new = {}
257
+ if options[:order]
258
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
222
259
 
223
- # Select and fix lines
224
- if (options[:exclude] and options[:exclude].call(line)) or
225
- (options[:select] and not options[:select].call(line))
226
- line = file.gets
227
- next
228
- end
260
+ values.each_with_index do |list, i|
261
+ next if list.nil? or list.empty?
229
262
 
230
- ### Process line
263
+ list = [list] unless Array === list
231
264
 
232
- # Chunk fields
233
- parts = parse_fields(line, options[:sep])
234
-
235
- # Get next line
236
- line = file.gets
237
-
238
- # Get id field
239
- next if parts[id_pos].nil? || parts[id_pos].empty?
240
- ids = parse_fields(parts[id_pos], options[:sep2])
241
- ids.collect!{|id| id.downcase } if options[:case_insensitive]
242
-
243
- # Get extra fields
244
-
245
- if options[:extra].nil? and not (options[:flatten] or options[:single])
246
- extra = parts
247
- extra.delete_at(id_pos)
248
- max_cols = extra.size if extra.size > (max_cols || 0)
249
- else
250
- if extra_pos.nil?
251
- extra = parts
252
- extra.delete_at id_pos
253
- else
254
- extra = parts.values_at(*extra_pos)
255
- end
256
- end
257
-
258
- extra.collect!{|value| parse_fields(value, options[:sep2])}
259
- extra.collect!{|values| values.first} if options[:unique]
260
- extra.flatten! if options[:flatten]
261
- extra = extra.first if options[:single]
262
-
263
- if options[:overwrite]
264
- main_entry = ids.shift
265
- ids.each do |id|
266
- data[id] = "__Ref:#{main_entry}"
265
+ list.each do |value|
266
+ next if value.nil? or value.empty?
267
+ value = value.downcase if options[:case_insensitive]
268
+ new[value] ||= []
269
+ new[value][i + 1] ||= []
270
+ new[value][i + 1] << key
271
+ end
272
+ new[key] ||= []
273
+ new[key][0] = key
267
274
  end
268
275
 
269
- data[main_entry] = extra
270
- else
271
- main_entry = ids.shift
272
- ids.each do |id|
273
- data[id] = "__Ref:#{main_entry}"
274
- end
276
+ end
275
277
 
276
- case
277
- when (options[:single] or options[:unique])
278
- data[main_entry] ||= extra
279
- when options[:flatten]
280
- if PersistenceHash === data
281
- data[main_entry] = (data[main_entry] || []).concat extra
282
- else
283
- data[main_entry] ||= []
284
- data[main_entry].concat extra
285
- end
286
- else
287
- entry = data[main_entry] || []
288
- while entry =~ /__Ref:(.*)/ do
289
- entry = data[$1]
290
- end
278
+ new.each do |key, values|
279
+ values.flatten!
280
+ values.compact!
281
+ end
291
282
 
292
- extra.each_with_index do |fields, i|
293
- if fields.empty?
294
- next unless options[:keep_empty]
295
- fields = [""]
283
+ else
284
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
285
+ new[key] ||= []
286
+ new[key] << key
287
+ values.each do |list|
288
+ next if list.nil?
289
+ if Array === list
290
+ list.each do |value|
291
+ value = value.downcase if options[:case_insensitive]
292
+ new[value] ||= []
293
+ new[value] << key
296
294
  end
297
- entry[i] ||= []
298
- entry[i] = entry[i].concat fields
295
+ else
296
+ next if list.empty?
297
+ value = list
298
+ value = value.downcase if options[:case_insensitive]
299
+ new[value] ||= []
300
+ new[value] << key
299
301
  end
300
-
301
- data[main_entry] = entry
302
- end
303
- end
304
- end
305
-
306
- if options[:keep_empty] and not max_cols.nil?
307
- data.each do |key,values|
308
- new_values = values
309
- max_cols.times do |i|
310
- new_values[i] ||= [""]
311
302
  end
312
- data[key] = new_values
313
303
  end
314
304
  end
315
305
 
316
-
317
- # Save header information
318
- key_field = nil
319
- fields = nil
320
- if header_fields && header_fields.any?
321
- key_field = header_fields[id_pos]
322
- if extra_pos.nil?
323
- fields = header_fields
324
- fields.delete_at(id_pos)
325
- else
326
- fields = header_fields.values_at(*extra_pos)
327
- end
306
+ if options[:persistence_file]
307
+ index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
308
+ index.merge! new
309
+ else
310
+ index = TSV.new(new, :case_insensitive => options[:case_insensitive])
328
311
  end
329
312
 
330
- data.read if PersistenceHash === data
331
-
332
- [key_field, fields]
313
+ index.key_field = new_key_field
314
+ index.fields = new_fields
315
+ index
333
316
  end
334
317
 
335
- attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
336
- def initialize(file = {}, options = {})
337
- @case_insensitive = options[:case_insensitive] == true
338
- @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
339
-
340
- case
341
- when TSV === file
342
- @filename = file.filename
343
- @data = file.data
344
- @key_field = file.key_field
345
- @fields = file.fields
346
- @case_insensitive = file.case_insensitive
347
- @list = file.is_list
348
- return self
349
- when (Hash === file or PersistenceHash === file)
350
- @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
351
- @data = file
352
- return self
353
- when File === file
354
- @filename = File.expand_path file.path
355
- when String === file && File.exists?(file)
356
- @filename = File.expand_path file
357
- file = Open.open(file)
358
- when StringIO
359
- else
360
- raise "File #{file} not found"
361
- end
318
+ def smart_merge(other, match = nil, new_fields = nil)
362
319
 
363
- if options[:persistence]
364
- options.delete :persistence
365
- persistence_file = TSV.get_persistence_file @filename, "file:#{ @filename }:", options
320
+ new_fields = [new_fields] if String === new_fields
321
+ if self.fields and other.fields
322
+ common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
323
+ new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
366
324
 
367
- if File.exists? persistence_file
368
- TSV.log "Loading Persistence for #{ @filename } in #{persistence_file}"
369
- @data = PersistenceHash.get(persistence_file, false)
370
- @key_field = @data.key_field
371
- @fields = @data.fields
372
- else
373
- @data = PersistenceHash.get(persistence_file, true)
374
- file = Open.grep(file, options[:grep]) if options[:grep]
325
+ common_fields.delete match if String === match
326
+ common_fields.delete_at match if Integer === match
375
327
 
376
- TSV.log "Persistent Parsing for #{ @filename } in #{persistence_file}"
377
- @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
378
- @data.key_field = @key_field
379
- @data.fields = @fields
380
- @data.read
381
- end
328
+ this_common_field_positions = self.field_positions *common_fields
329
+ other_common_field_positions = other.field_positions *common_fields
330
+ other_new_field_positions = other.field_positions *new_fields
382
331
  else
383
- TSV.log "Non-persistent parsing for #{ @filename }"
384
- @data = {}
385
- file = Open.grep(file, options[:grep]) if options[:grep]
386
- @key_field, @fields = TSV.parse(@data, file, options)
332
+ nofieldinfo = true
387
333
  end
388
334
 
389
- file.close
390
- @case_insensitive = options[:case_insensitive] == true
391
- end
335
+ case
336
+ when TSV === match
337
+ match_index = match
338
+ matching_code_position = nil
392
339
 
340
+ when Array === match
341
+ match_index = match.first
342
+ matching_code_position = field_positions(match.last).first
393
343
 
394
- def to_s
395
- str = ""
344
+ when match =~ /^through:(.*)/
345
+ through = $1
346
+ if through =~ /(.*)#using:(.*)/
347
+ through = $1
348
+ matching_code_position = field_positions($2).first
349
+ else
350
+ matching_code_position = nil
351
+ end
352
+ index_fields = TSV.headers(through)
353
+ target_field = index_fields.select{|field| other.fields_include field}.first
354
+ Log.debug "Target Field: #{ target_field }"
355
+ match_index = TSV.open_file(through).index(:field => target_field)
396
356
 
397
- if fields
398
- str << "#" << key_field << "\t" << fields * "\t" << "\n"
357
+ when field_positions(match).first
358
+ matching_code_position = field_positions(match).first
359
+ match_index = nil
399
360
  end
400
361
 
401
- each do |key, values|
402
- case
403
- when values.nil?
404
- str << key.dup << "\n"
405
- when (not Array === values)
406
- str << key.dup << "\t" << values.to_s << "\n"
407
- when Array === values.first
408
- str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
409
- else
410
- str << key.dup << "\t" << values * "\t" << "\n"
362
+ if matching_code_position.nil? and match_index.fields
363
+ match_index.fields.each do |field|
364
+ if matching_code_position = field_positions(field).first
365
+ break
366
+ end
411
367
  end
412
368
  end
413
369
 
414
- str
415
- end
416
-
417
- #{{{ New
418
-
419
- def self.fields_include(key_field, fields, field)
420
- return true if field == key_field or fields.include? field
421
- return false
422
- end
370
+ if match_index and match_index.key_field == other.key_field
371
+ other_index = nil
372
+ else
373
+ other_index = (match === String and other.fields_include(match)) ?
374
+ other.index(:other => match, :order => true) : other.index(:order => true)
375
+ end
423
376
 
424
- def self.field_positions(key_field, fields, *selected)
425
- selected.collect do |sel|
426
- case
427
- when (sel.nil? or sel == :main or sel == key_field)
428
- -1
429
- when Integer === sel
430
- sel
377
+ each do |key,values|
378
+ Log.debug "Key: #{ key }. Values: #{values * ", "}"
379
+ if matching_code_position.nil? or matching_code_position == -1
380
+ matching_codes = [key]
431
381
  else
432
- Misc.field_position fields, sel
382
+ matching_codes = values[matching_code_position]
383
+ matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
433
384
  end
434
- end
435
- end
436
-
437
- def fields_include(field)
438
- return TSV.fields_include key_field, fields, field
439
- end
440
-
441
- def field_positions(*selected)
442
- return nil if selected.nil? or selected == [nil]
443
- TSV.field_positions(key_field, fields, *selected)
444
- end
385
+ Log.debug "Matching codes: #{matching_codes}"
445
386
 
446
- def fields_at(*positions)
447
- return nil if fields.nil?
448
- return nil if positions.nil? or positions == [nil]
449
- (fields + [key_field]).values_at(*positions)
450
- end
387
+ next if matching_codes.nil?
451
388
 
452
- def through(new_key_field = nil, new_fields = nil, &block)
453
- new_key_position = (field_positions(new_key_field) || [-1]).first
389
+ matching_codes.each do |matching_code|
390
+ if match_index
391
+ if match_index[matching_code]
392
+ matching_code_fix = match_index[matching_code].first
393
+ else
394
+ matching_code_fix = nil
395
+ end
396
+ else
397
+ matching_code_fix = matching_code
398
+ end
454
399
 
455
- if new_key_position == -1
400
+ Log.debug "Matching code (fix): #{matching_code_fix}"
401
+ next if matching_code_fix.nil?
456
402
 
457
- if new_fields.nil? or new_fields == fields
458
- each &block
459
- return [key_field, fields]
460
- else
461
- new_field_positions = field_positions(*new_fields)
462
- each do |key, values|
463
- yield key, values.values_at(*new_field_positions)
403
+ if other_index
404
+ Log.debug "Using other_index"
405
+ other_codes = other_index[matching_code_fix]
406
+ else
407
+ other_codes = matching_code_fix
464
408
  end
465
- return [key_field, fields_at(*new_field_positions)]
466
- end
409
+ Log.debug "Other codes: #{other_codes}"
467
410
 
468
- else
469
- new_field_positions = field_positions(*new_fields)
411
+ next if other_codes.nil? or other_codes.empty?
412
+ other_code = other_codes.first
470
413
 
471
- new_field_names = fields_at(*new_field_positions)
472
- if new_field_names.nil? and fields
473
- new_field_names = fields.dup
474
- new_field_names.delete_at new_key_position
475
- new_field_names.unshift key_field
476
- end
414
+ if nofieldinfo
415
+ next if other[other_code].nil?
416
+ if list
417
+ other_values = [[other_code]] + other[other_code]
418
+ else
419
+ other_values = [other_code] + other[other_code]
420
+ end
421
+ other_values.delete_if do |list|
422
+ list = [list] unless Array === list
423
+ list.collect{|e| case_insensitive ? e.downcase : e }.
424
+ select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
425
+ end
477
426
 
478
- each do |key, values|
479
- if list
480
- tmp_values = values + [[key]]
427
+ new_values = values + other_values
481
428
  else
482
- tmp_values = values + [key]
483
- end
429
+ if other[other_code].nil?
430
+ if list
431
+ other_values = [[]] * other.fields.length
432
+ else
433
+ other_values = [] * other.fields.length
434
+ end
435
+ else
436
+ if list
437
+ other_values = other[other_code] + [[other_code]]
438
+ else
439
+ other_values = other[other_code] + [other_code]
440
+ end
441
+ end
442
+
484
443
 
485
- if new_field_positions.nil?
486
444
  new_values = values.dup
487
- new_values.delete_at new_key_position
488
- new_values.unshift [key]
489
- else
490
- new_values = tmp_values.values_at(*new_field_positions)
491
- end
492
445
 
493
- tmp_values[new_key_position].each do |new_key|
494
- if new_field_names
495
- yield new_key, NamedArray.name(new_values, new_field_names)
496
- else
497
- yield new_key, new_values
446
+ if list
447
+ this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
448
+ new_values_tops = new_values[tpos]
449
+
450
+ if other.list
451
+ new_values_tops += other_values[opos]
452
+ else
453
+ new_values_tops += [other_values[opos]]
454
+ end
455
+
456
+ new_values[tpos] = new_values_tops.uniq
457
+ end
498
458
  end
459
+
460
+ new_values.concat other_values.values_at *other_new_field_positions
499
461
  end
462
+
463
+ self[key] = new_values
500
464
  end
501
- return [(fields_at(new_key_position) || [nil]).first, new_field_names]
502
465
  end
466
+
467
+ self.fields = self.fields + new_fields unless nofieldinfo
468
+ end
469
+
470
+ #{{{ Helpers
471
+
472
+ def self.index(file, options = {})
473
+ opt_data = options.dup
474
+ opt_index = options.dup
475
+ opt_data.delete :field
476
+ opt_data.delete :persistence
477
+ opt_index.delete :persistence
478
+
479
+ opt_data[:persistence] = true if options[:data_persistence]
480
+
481
+ opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
482
+
483
+ if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
484
+ Log.low "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
485
+ TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
486
+ else
487
+ Log.low "Creating index for #{ file }: #{opt_index[:persistence_file]}"
488
+ data = TSV.new(file, opt_data)
489
+ data.index(opt_index)
490
+ end
491
+ end
492
+
493
+ def self.open_file(file)
494
+ if file =~ /(.*?)#(.*)/
495
+ file, options = $1, Misc.string2hash($2.to_s)
496
+ else
497
+ options = {}
498
+ end
499
+
500
+ TSV.new(file, options)
503
501
  end
504
-
505
- def process(field)
506
- through do |key, values|
507
- values[field].replace yield(values[field], key, values) unless values[field].nil?
508
- end
502
+
503
+ #{{{ Accesor Methods
504
+
505
+ def keys
506
+ @data.keys
509
507
  end
510
508
 
509
+ def values
510
+ @data.values
511
+ end
511
512
 
512
- def reorder(new_key_field, new_fields = nil, options = {})
513
- options = Misc.add_defaults options
514
- return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
513
+ def size
514
+ @data.size
515
+ end
515
516
 
516
- new = {}
517
- new_key_field, new_fields = through new_key_field, new_fields do |key, values|
518
- if new[key].nil?
519
- new[key] = values
520
- else
521
- new[key] = new[key].zip(values)
522
- end
523
- end
517
+ # Write
524
518
 
525
- new.each do |key,values|
526
- values.each{|list| list.flatten! if Array === list}
519
+ def []=(key, value)
520
+ key = key.downcase if @case_insensitive
521
+ @data[key] = value
522
+ end
523
+
524
+
525
+ def merge!(new_data)
526
+ new_data.each do |key, value|
527
+ self[key] = value
527
528
  end
529
+ end
528
530
 
529
- if options[:persistence_file]
530
- reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
531
- reordered.merge! new
531
+ # Read
532
+
533
+ def follow(value)
534
+ if String === value && value =~ /__Ref:(.*)/
535
+ return self[$1]
532
536
  else
533
- reordered = TSV.new(new, :case_insensitive => case_insensitive)
537
+ value = NamedArray.name value, fields if Array === value and fields
538
+ value
534
539
  end
540
+ end
535
541
 
536
- reordered.key_field = new_key_field
537
- reordered.fields = new_fields
542
+ def [](key)
543
+ if Array === key
544
+ return @data[key] if @data[key] != nil
545
+ key.each{|k| v = self[k]; return v unless v.nil?}
546
+ return nil
547
+ end
538
548
 
539
- reordered
549
+ key = key.downcase if @case_insensitive
550
+ follow @data[key]
540
551
  end
541
552
 
542
- def slice(new_fields, options = {})
543
- reorder(:main, new_fields)
553
+ def values_at(*keys)
554
+ keys.collect{|k|
555
+ self[k]
556
+ }
544
557
  end
545
558
 
546
- def index(options = {})
547
- options = Misc.add_defaults options, :order => false
548
-
549
- if options[:persistence] and ! options[:persistence_file]
550
- options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
559
+ def each(&block)
560
+ @data.each do |key, value|
561
+ block.call(key, follow(value))
551
562
  end
563
+ end
552
564
 
553
- if options[:persistence_file] and File.exists?(options[:persistence_file])
554
- return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
565
+ def collect
566
+ if block_given?
567
+ @data.collect do |key, value|
568
+ value = follow(value)
569
+ key, values = yield key, value
570
+ end
571
+ else
572
+ @data.collect do |key, value|
573
+ [key, follow(value)]
574
+ end
555
575
  end
576
+ end
556
577
 
557
- new = {}
558
- if options[:order]
559
- new_key_field, new_fields = through options[:field], options[:others] do |key, values|
560
-
561
- values.each_with_index do |list, i|
562
- next if list.nil? or list.empty?
563
-
564
- list = [list] unless Array === list
565
-
566
- list.each do |value|
567
- next if value.nil? or value.empty?
568
- value = value.downcase if options[:case_insensitive]
569
- new[value] ||= []
570
- new[value][i + 1] ||= []
571
- new[value][i + 1] << key
572
- end
573
- new[key] ||= []
574
- new[key][0] = key
575
- end
578
+ def sort(&block)
579
+ collect.sort(&block).collect{|p|
580
+ key, value = p
581
+ value = NamedArray.name value, fields if fields
582
+ [key, value]
583
+ }
584
+ end
576
585
 
577
- end
586
+ def sort_by(&block)
587
+ collect.sort_by &block
588
+ end
578
589
 
579
- new.each do |key, values|
580
- values.flatten!
581
- values.compact!
582
- end
590
+ def to_s
591
+ str = ""
583
592
 
584
- else
585
- new_key_field, new_fields = through options[:field], options[:others] do |key, values|
586
- new[key] ||= []
587
- new[key] << key
588
- values.each do |list|
589
- next if list.nil?
590
- if Array === list
591
- list.each do |value|
592
- value = value.downcase if options[:case_insensitive]
593
- new[value] ||= []
594
- new[value] << key
595
- end
596
- else
597
- next if list.empty?
598
- value = list
599
- value = value.downcase if options[:case_insensitive]
600
- new[value] ||= []
601
- new[value] << key
602
- end
603
- end
604
- end
593
+ if fields
594
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
605
595
  end
606
596
 
607
- if options[:persistence_file]
608
- index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
609
- index.merge! new
610
- else
611
- index = TSV.new(new, :case_insensitive => options[:case_insensitive])
597
+ each do |key, values|
598
+ case
599
+ when values.nil?
600
+ str << key.dup << "\n"
601
+ when (not Array === values)
602
+ str << key.dup << "\t" << values.to_s << "\n"
603
+ when Array === values.first
604
+ str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
605
+ else
606
+ str << key.dup << "\t" << values * "\t" << "\n"
607
+ end
612
608
  end
613
609
 
614
- index.key_field = new_key_field
615
- index.fields = new_fields
616
- index
610
+ str
617
611
  end
618
612
 
619
- def smart_merge(other, match = nil, new_fields = nil)
620
-
621
- new_fields = [new_fields] if String === new_fields
622
- if self.fields and other.fields
623
- common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
624
- new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
613
+ #{{{ Parsing
614
+
615
+ def self.parse_fields(io, delimiter = "\t")
616
+ return [] if io.nil?
617
+ fields = io.split(delimiter, -1)
618
+ fields
619
+ end
625
620
 
626
- common_fields.delete match if String === match
627
- common_fields.delete_at match if Integer === match
621
+ def self.zip_fields(list, fields = nil)
622
+ return [] if list.nil? || list.empty?
623
+ fields ||= list.fields if list.respond_to? :fields
624
+ zipped = list[0].zip(*list[1..-1])
625
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
626
+ zipped
627
+ end
628
+
629
+ def self.parse(data, file, options = {})
628
630
 
629
- this_common_field_positions = self.field_positions *common_fields
630
- other_common_field_positions = other.field_positions *common_fields
631
- other_new_field_positions = other.field_positions *new_fields
632
- else
633
- nofieldinfo = true
634
- end
631
+ # Prepare options
632
+ options = add_defaults options,
633
+ :sep => "\t",
634
+ :sep2 => "|",
635
+ :native => 0,
636
+ :extra => nil,
637
+ :fix => nil,
638
+ :exclude => nil,
639
+ :select => nil,
640
+ :grep => nil,
641
+ :single => false,
642
+ :unique => false,
643
+ :flatten => false,
644
+ :overwrite => false,
645
+ :keep_empty => true,
646
+ :case_insensitive => false,
647
+ :header_hash => '#' ,
648
+ :persistence_file => nil
635
649
 
636
- case
637
- when TSV === match
638
- match_index = match
639
- matching_code_position = nil
650
+ options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
651
+ options[:flatten] = true if options[:single]
640
652
 
641
- when Array === match
642
- match_index = match.first
643
- matching_code_position = field_positions(match.last).first
644
653
 
645
- when match =~ /^through:(.*)/
646
- through = $1
647
- if through =~ /(.*)#using:(.*)/
648
- through = $1
649
- matching_code_position = field_positions($2).first
650
- else
651
- matching_code_position = nil
652
- end
653
- index_fields = TSV.headers(through)
654
- target_field = index_fields.select{|field| other.fields_include field}.first
655
- Log.debug "Target Field: #{ target_field }"
656
- match_index = TSV.open_file(through).index(:field => target_field)
657
654
 
658
- when field_positions(match).first
659
- matching_code_position = field_positions(match).first
660
- match_index = nil
661
- end
655
+ #{{{ Process first line
662
656
 
663
- if matching_code_position.nil? and match_index.fields
664
- match_index.fields.each do |field|
665
- if matching_code_position = field_positions(field).first
666
- break
667
- end
668
- end
657
+ line = file.gets
658
+ raise "Empty content" if line.nil?
659
+ line.chomp!
660
+
661
+ if line =~ /^#{options[:header_hash]}/
662
+ header_fields = parse_fields(line, options[:sep])
663
+ header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
664
+ line = file.gets
665
+ else
666
+ header_fields = nil
669
667
  end
668
+
669
+ id_pos = Misc.field_position(header_fields, options[:native])
670
670
 
671
- if match_index and match_index.key_field == other.key_field
672
- other_index = nil
671
+ if options[:extra].nil?
672
+ extra_pos = nil
673
+ max_cols = 0
673
674
  else
674
- other_index = (match === String and other.fields_include(match)) ?
675
- other.index(:other => match, :order => true) : other.index(:order => true)
675
+ extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
676
676
  end
677
677
 
678
- each do |key,values|
679
- Log.debug "Key: #{ key }. Values: #{values * ", "}"
680
- if matching_code_position.nil? or matching_code_position == -1
681
- matching_codes = [key]
682
- else
683
- matching_codes = values[matching_code_position]
684
- matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
678
+ #{{{ Process rest
679
+ while line do
680
+ line.chomp!
681
+
682
+ line = options[:fix].call line if options[:fix]
683
+
684
+ # Select and fix lines
685
+ if (options[:exclude] and options[:exclude].call(line)) or
686
+ (options[:select] and not options[:select].call(line))
687
+ line = file.gets
688
+ next
685
689
  end
686
- Log.debug "Matching codes: #{matching_codes}"
687
690
 
688
- next if matching_codes.nil?
691
+ ### Process line
689
692
 
690
- matching_codes.each do |matching_code|
691
- if match_index
692
- if match_index[matching_code]
693
- matching_code_fix = match_index[matching_code].first
694
- else
695
- matching_code_fix = nil
696
- end
693
+ # Chunk fields
694
+ parts = parse_fields(line, options[:sep])
695
+
696
+ # Get next line
697
+ line = file.gets
698
+
699
+ # Get id field
700
+ next if parts[id_pos].nil? || parts[id_pos].empty?
701
+ ids = parse_fields(parts[id_pos], options[:sep2])
702
+ ids.collect!{|id| id.downcase } if options[:case_insensitive]
703
+
704
+ # Get extra fields
705
+
706
+ if options[:extra].nil? and not (options[:flatten] or options[:single])
707
+ extra = parts
708
+ extra.delete_at(id_pos)
709
+ max_cols = extra.size if extra.size > (max_cols || 0)
710
+ else
711
+ if extra_pos.nil?
712
+ extra = parts
713
+ extra.delete_at id_pos
697
714
  else
698
- matching_code_fix = matching_code
715
+ extra = parts.values_at(*extra_pos)
699
716
  end
717
+ end
700
718
 
701
- Log.debug "Matching code (fix): #{matching_code_fix}"
702
- next if matching_code_fix.nil?
719
+ extra.collect!{|value| parse_fields(value, options[:sep2])}
720
+ extra.collect!{|values| values.first} if options[:unique]
721
+ extra.flatten! if options[:flatten]
722
+ extra = extra.first if options[:single]
703
723
 
704
- if other_index
705
- Log.debug "Using other_index"
706
- other_codes = other_index[matching_code_fix]
707
- else
708
- other_codes = matching_code_fix
724
+ if options[:overwrite]
725
+ main_entry = ids.shift
726
+ ids.each do |id|
727
+ data[id] = "__Ref:#{main_entry}"
709
728
  end
710
- Log.debug "Other codes: #{other_codes}"
711
729
 
712
- next if other_codes.nil? or other_codes.empty?
713
- other_code = other_codes.first
730
+ data[main_entry] = extra
731
+ else
732
+ main_entry = ids.shift
733
+ ids.each do |id|
734
+ data[id] = "__Ref:#{main_entry}"
735
+ end
714
736
 
715
- if nofieldinfo
716
- next if other[other_code].nil?
717
- if list
718
- other_values = [[other_code]] + other[other_code]
737
+ case
738
+ when (options[:single] or options[:unique])
739
+ data[main_entry] ||= extra
740
+ when options[:flatten]
741
+ if PersistenceHash === data
742
+ data[main_entry] = (data[main_entry] || []).concat extra
719
743
  else
720
- other_values = [other_code] + other[other_code]
721
- end
722
- other_values.delete_if do |list|
723
- list = [list] unless Array === list
724
- list.collect{|e| case_insensitive ? e.downcase : e }.
725
- select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
744
+ data[main_entry] ||= []
745
+ data[main_entry].concat extra
726
746
  end
727
-
728
- new_values = values + other_values
729
747
  else
730
- if other[other_code].nil?
731
- if list
732
- other_values = [[]] * other.fields.length
733
- else
734
- other_values = [] * other.fields.length
735
- end
736
- else
737
- if list
738
- other_values = other[other_code] + [[other_code]]
739
- else
740
- other_values = other[other_code] + [other_code]
741
- end
748
+ entry = data[main_entry] || []
749
+ while entry =~ /__Ref:(.*)/ do
750
+ entry = data[$1]
742
751
  end
743
-
744
-
745
- new_values = values.dup
746
-
747
- if list
748
- this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
749
- new_values_tops = new_values[tpos]
750
-
751
- if other.list
752
- new_values_tops += other_values[opos]
753
- else
754
- new_values_tops += [other_values[opos]]
755
- end
756
752
 
757
- new_values[tpos] = new_values_tops.uniq
753
+ extra.each_with_index do |fields, i|
754
+ if fields.empty?
755
+ next unless options[:keep_empty]
756
+ fields = [""]
758
757
  end
758
+ entry[i] ||= []
759
+ entry[i] = entry[i].concat fields
759
760
  end
760
761
 
761
- new_values.concat other_values.values_at *other_new_field_positions
762
+ data[main_entry] = entry
762
763
  end
764
+ end
765
+ end
763
766
 
764
- self[key] = new_values
767
+ if options[:keep_empty] and not max_cols.nil?
768
+ data.each do |key,values|
769
+ new_values = values
770
+ max_cols.times do |i|
771
+ new_values[i] ||= [""]
772
+ end
773
+ data[key] = new_values
765
774
  end
766
775
  end
767
776
 
768
- self.fields = self.fields + new_fields unless nofieldinfo
769
- end
770
777
 
771
- #{{{ Helpers
778
+ # Save header information
779
+ key_field = nil
780
+ fields = nil
781
+ if header_fields && header_fields.any?
782
+ key_field = header_fields[id_pos]
783
+ if extra_pos.nil?
784
+ fields = header_fields
785
+ fields.delete_at(id_pos)
786
+ else
787
+ fields = header_fields.values_at(*extra_pos)
788
+ end
789
+ end
772
790
 
773
- def self.index(file, options = {})
774
- opt_data = options.dup
775
- opt_index = options.dup
776
- opt_data.delete :field
777
- opt_data.delete :persistence
778
- opt_index.delete :persistence
791
+ data.read if PersistenceHash === data
779
792
 
780
- opt_data[:persistence] = true if options[:data_persistence]
793
+ [key_field, fields]
794
+ end
781
795
 
782
- opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
796
+ attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
797
+ def fields
798
+ fields = @fields
799
+ fields.each do |f| f.extend Field end if Array === fields
800
+ fields
801
+ end
783
802
 
784
- if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
785
- TSV.log "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
786
- TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
787
- else
788
- TSV.log "Creating index for #{ file }: #{opt_index[:persistence_file]}"
789
- data = TSV.new(file, opt_data)
790
- data.index(opt_index)
803
+ def initialize(file = {}, options = {})
804
+ options = Misc.add_defaults options
805
+ options[:persistence] = true if options[:persistence_file]
806
+
807
+ if String === file && file =~ /(.*?)#(.*)/
808
+ file, file_options = $1, $2
809
+ options = Misc.add_defaults file_options, options
791
810
  end
792
- end
793
811
 
794
- def self.open_file(file)
795
- if file =~ /(.*?)#(.*)/
796
- file, options = $1, Misc.string2hash($2.to_s)
812
+ @case_insensitive = options[:case_insensitive] == true
813
+ @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
814
+
815
+ case
816
+ when TSV === file
817
+ Log.low "Copying TSV"
818
+ @filename = file.filename
819
+
820
+ if options[:persistence] and not PersistenceHash === file.data
821
+ persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
822
+ Log.low "Making persistance #{ persistence_file }"
823
+ @data = TCHash.get(persistence_file)
824
+ @data.merge! file
825
+ @data.key_field = file.key_field
826
+ @data.fields = file.fields
827
+ else
828
+ @data = file.data
829
+ end
830
+
831
+ @key_field = file.key_field
832
+ @fields = file.fields
833
+ @case_insensitive = file.case_insensitive
834
+ @list = file.list
835
+ return self
836
+ when Hash === file
837
+ Log.low "Encapsulating Hash"
838
+ @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
839
+ if options[:persistence]
840
+ persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
841
+ Log.low "Making persistance #{ persistence_file }"
842
+ @data = TCHash.get(persistence_file)
843
+ @data.merge! file
844
+ else
845
+ @data = file
846
+ end
847
+ return self
848
+ when PersistenceHash === file
849
+ Log.low "Encapsulating PersistenceHash"
850
+ @filename = "PersistenceHash:" + Digest::MD5.hexdigest(file.inspect)
851
+ @data = file
852
+ @key_field = file.key_field
853
+ @fields = file.fields
854
+ return self
855
+ when File === file
856
+ @filename = File.expand_path file.path
857
+ when String === file && File.exists?(file)
858
+ @filename = File.expand_path file
859
+ file = Open.open(file)
860
+ when StringIO
861
+ else
862
+ raise "File #{file} not found"
863
+ end
864
+
865
+ if options[:persistence]
866
+ options.delete :persistence
867
+ persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
868
+
869
+ if File.exists? persistence_file
870
+ Log.low "Loading Persistence for #{ @filename } in #{persistence_file}"
871
+ @data = PersistenceHash.get(persistence_file, false)
872
+ @key_field = @data.key_field
873
+ @fields = @data.fields
874
+ else
875
+ @data = PersistenceHash.get(persistence_file, true)
876
+ file = Open.grep(file, options[:grep]) if options[:grep]
877
+
878
+ Log.low "Persistent Parsing for #{ @filename } in #{persistence_file}"
879
+ @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
880
+ @data.key_field = @key_field
881
+ @data.fields = @fields
882
+ @data.read
883
+ end
797
884
  else
798
- options = {}
885
+ Log.low "Non-persistent parsing for #{ @filename }"
886
+ @data = {}
887
+ file = Open.grep(file, options[:grep]) if options[:grep]
888
+ @key_field, @fields = TSV.parse(@data, file, options)
799
889
  end
800
890
 
801
- TSV.new(file, options)
891
+ file.close
892
+ @case_insensitive = options[:case_insensitive] == true
802
893
  end
803
894
 
804
895
  end
896
+
897
+ #{{{ CacheHelper
898
+ require 'rbbt/util/cachehelper'
899
+ module CacheHelper
900
+ def self.tsv_cache(name, key = [])
901
+ cache_file = CacheHelper.build_filename name, key
902
+
903
+ if File.exists? cache_file
904
+ Log.debug "TSV cache file '#{cache_file}' found"
905
+ hash = TCHash.get(cache_file)
906
+ TSV.new(hash)
907
+ else
908
+ Log.debug "Producing TSV cache file '#{cache_file}'"
909
+ data = yield
910
+ TSV.new(data, :persistence_file => cache_file)
911
+ end
912
+ end
913
+ end