rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -1,58 +1,13 @@
1
1
  require 'rbbt/util/open'
2
2
  require 'rbbt/util/tsv'
3
3
  require 'rbbt/util/log'
4
+ require 'rbbt/util/path'
4
5
  require 'rbbt/util/rake'
5
6
 
6
7
  module PKGData
7
8
  attr_accessor :claims
8
- def self.extended(base)
9
- base.claims = {}
10
- end
11
-
12
- module Path
13
- attr_accessor :base
14
-
15
- def method_missing(name, *args, &block)
16
- new = File.join(self.dup, name.to_s)
17
- new.extend Path
18
- new.base = base
19
- new
20
- end
21
-
22
- def [](name)
23
- new = File.join(self.dup, name.to_s)
24
- new.extend Path
25
- new.base = base
26
- new
27
- end
28
-
29
- def tsv(options = {})
30
- produce
31
- TSV.new self, options
32
- end
33
-
34
- def index(field = nil, other = nil, options = {})
35
- produce
36
- TSV.index self, options.merge(:target => field, :others => other)
37
- end
38
-
39
- def open
40
- produce
41
- Open.open(self)
42
- end
43
-
44
- def read
45
- produce
46
- Open.read(self)
47
- end
48
-
49
- def produce
50
- return if File.exists? self
51
-
52
- Log.debug("Trying to produce '#{ self }'")
53
- file, producer = base.reclaim self
54
- base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
55
- end
9
+ def self.extended(pkg_module)
10
+ pkg_module.claims = {}
56
11
  end
57
12
 
58
13
  class SharedirNotFoundError < StandardError; end
@@ -84,19 +39,18 @@ module PKGData
84
39
 
85
40
  def files
86
41
  path = datadir.dup.extend Path
87
- path.base = self
42
+ path.pkg_module = self
43
+ path.datadir = datadir
88
44
  path
89
45
  end
90
46
 
91
47
  def in_datadir?(file)
92
- if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
93
- true
94
- else
95
- false
96
- end
48
+ Misc.in_directory? file, datadir
97
49
  end
98
50
 
99
- def claim(file, get = nil, subdir = nil, sharedir = nil)
51
+ # file is the complete path of the file inside the datadir
52
+ # get is the get method. :Rakefile for
53
+ def claim(file, get = nil, subdir = nil, namespace = nil, sharedir = nil)
100
54
  file = case
101
55
  when (file.nil? or file === :all)
102
56
  File.join(datadir, subdir.to_s)
@@ -107,7 +61,7 @@ module PKGData
107
61
  end
108
62
 
109
63
  sharedir ||= PKGData.get_caller_sharedir
110
- claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
64
+ claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir, :namespace => namespace}
111
65
  produce(file, get, subdir, sharedir) if TSV === get
112
66
  produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
113
67
  end
@@ -139,9 +93,12 @@ module PKGData
139
93
 
140
94
  FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
141
95
 
96
+ relative_path = Misc.path_relative_to file, datadir
142
97
  case
143
98
  when get.nil?
144
- FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
99
+ FileUtils.cp File.join(sharedir, relative_path), file.to_s
100
+ when StringIO === get
101
+ Open.write(file, get.read)
145
102
  when Proc === get
146
103
  Open.write(file, get.call)
147
104
  when TSV === get
@@ -153,10 +110,10 @@ module PKGData
153
110
  rakefile = File.join(sharedir, get.to_s)
154
111
  end
155
112
  produce_with_rake(rakefile, subdir, file)
156
- when String === get
113
+ when (String === get and Open.remote? get)
157
114
  Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
158
115
  else
159
- raise "Unknown Get: #{get.class}"
116
+ raise "Unknown Get: #{get.class} #{get}"
160
117
  end
161
118
  end
162
119
  end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/open'
2
+ require 'rbbt/util/misc'
2
3
  require 'rbbt/util/tsv'
3
4
  require 'rbbt/util/log'
4
5
  require 'rbbt/util/cmd'
@@ -52,7 +53,20 @@ module PKGSoftware
52
53
 
53
54
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
54
55
 
55
- if get.nil? or get.empty?
56
+ case
57
+ when get == :directory
58
+ FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
59
+ subdir = Misc.path_relative_to File.dirname(path), opt_dir
60
+ source = File.join(sharedir, 'install/software', subdir, pkg)
61
+
62
+ FileUtils.cp_r File.join(sharedir, 'install/software', subdir, pkg), path
63
+ when get == :binary
64
+ FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
65
+ subdir = Misc.path_relative_to File.dirname(path), opt_dir
66
+ source = File.join(sharedir, 'install/software', subdir, pkg)
67
+
68
+ FileUtils.cp File.join(sharedir, 'install/software', subdir, pkg), path
69
+ when (get.nil? or get.empty?)
56
70
  CMD.cmd("#{File.join(sharedir, 'install', 'software', pkg)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}", :stderr => Log::HIGH)
57
71
  else
58
72
  CMD.cmd("#{File.join(sharedir, 'install', 'software', get)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}")
@@ -17,7 +17,11 @@ module RakeHelper
17
17
  end
18
18
  def self.define_task(file, *args, &block)
19
19
  @@files ||= []
20
- @@files << file
20
+ if Hash === file
21
+ @@files << file.keys.first.to_s
22
+ else
23
+ @@files << file.to_s
24
+ end
21
25
  old_define_task(file, *args, &block)
22
26
  end
23
27
 
@@ -1,39 +1,143 @@
1
+ require 'rbbt/util/misc'
1
2
  require 'tokyocabinet'
2
3
 
3
4
  class TCHash < TokyoCabinet::HDB
4
5
  class OpenError < StandardError;end
5
6
  class KeyFormatError < StandardError;end
6
7
 
7
- Serializer = Marshal
8
+ class StringSerializer
9
+ def self.dump(str); str.to_s; end
10
+ def self.load(str); str; end
11
+ end
12
+
13
+ class StringArraySerializer
14
+ def self.dump(array)
15
+ array.collect{|a| a.to_s} * "\t"
16
+ end
17
+
18
+ def self.load(string)
19
+ string.split(/\t/)
20
+ end
21
+ end
22
+
23
+ class StringDoubleArraySerializer
24
+ def self.dump(array)
25
+ array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
26
+ end
27
+
28
+ def self.load(string)
29
+ string.split(/\t/).collect{|l| l.split("|")}
30
+ end
31
+ end
32
+
33
+
34
+ ALIAS = {:marshal => Marshal, nil => Marshal, :single => StringSerializer, :list => StringArraySerializer, :double => StringDoubleArraySerializer}
35
+
36
+ CONNECTIONS = {}
8
37
 
9
38
  FIELD_INFO_ENTRIES = {
10
- :fields => '__tokyocabinet_hash_fields',
11
- :key_field => '__tokyocabinet_hash_key_field',
12
- :filename => '__tokyocabinet_hash_filename',
13
- :type => '__tokyocabinet_hash_type',
14
- :case_insensitive => '__tokyocabinet_hash_case_insensitive'
39
+ :type => '__tokyocabinet_hash_type',
40
+ :serializer => '__tokyocabinet_hash_serializer',
41
+ :identifiers => '__tokyocabinet_hash_identifiers',
42
+ :datadir => '__tokyocabinet_hash_datadir',
43
+ :fields => '__tokyocabinet_hash_fields',
44
+ :key_field => '__tokyocabinet_hash_key_field',
45
+ :filename => '__tokyocabinet_hash_filename',
46
+ :namespace => '__tokyocabinet_hash_namspace',
47
+ :type => '__tokyocabinet_hash_type',
48
+ :case_insensitive => '__tokyocabinet_hash_case_insensitive'
15
49
  }
16
- CONNECTIONS = {}
17
50
 
18
51
  FIELD_INFO_ENTRIES.each do |entry, key|
19
52
  class_eval do
20
- define_method entry.to_s, proc{self[key]}
21
- define_method entry.to_s + "=", proc{|value| write unless write?; self[key] = value}
53
+ define_method entry.to_s, proc{v = self.original_get_brackets(key); v.nil? ? nil : Marshal.load(v)}
54
+ define_method entry.to_s + "=", proc{|value| write unless write?; self.original_set_brackets key, Marshal.dump(value)}
55
+ end
56
+ end
57
+
58
+ def serializer
59
+ @serializer
60
+ end
61
+
62
+ def serializer=(value)
63
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer],value) unless value.nil?
64
+ end
65
+
66
+ alias original_open open
67
+ def open(write = false)
68
+ flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
69
+ if !self.original_open(@path_to_db, flags)
70
+ ecode = self.ecode
71
+ raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
72
+ end
73
+
74
+ @write = write
75
+
76
+ if write
77
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s) unless @serializer.nil?
78
+ else
79
+ serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
80
+
81
+ if serializer_str.nil? or serializer_str.empty?
82
+ @serializer = Marshal
83
+ else
84
+ mod = Misc.string2const serializer_str
85
+ @serializer = mod
86
+ end
87
+ end
88
+ end
89
+
90
+ def write?
91
+ @write
92
+ end
93
+
94
+ def write
95
+ self.close
96
+ self.open(true)
97
+ end
98
+
99
+ def read
100
+ self.close
101
+ self.open(false)
102
+ end
103
+
104
+ def initialize(path, write = false, serializer = Marshal)
105
+ super()
106
+
107
+ serializer = ALIAS[serializer] if ALIAS.include? serializer
108
+
109
+ @path_to_db = path
110
+ @serializer = serializer
111
+
112
+ if write || ! File.exists?(@path_to_db)
113
+ self.open(true)
114
+ else
115
+ self.open(false)
22
116
  end
23
117
  end
24
118
 
119
+ def self.get(path, write = false, serializer = Marshal)
120
+ serializer = ALIAS[serializer] if ALIAS.include? serializer
121
+ @serializer = serializer
122
+ d = CONNECTIONS[path] ||= self.new(path, false, @serializer)
123
+ write ? d.write : d.read
124
+ d
125
+ end
126
+
127
+ #{{{ ACESSORS
128
+
25
129
  alias original_get_brackets []
26
130
  def [](key)
27
131
  return nil unless String === key
28
132
  result = self.original_get_brackets(key)
29
- result ? Serializer.load(result) : nil
133
+ result ? @serializer.load(result) : nil
30
134
  end
31
135
 
32
136
  alias original_set_brackets []=
33
137
  def []=(key,value)
34
138
  raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
35
- write unless write?
36
- self.original_set_brackets(key, Serializer.dump(value))
139
+ raise "Closed TCHash connection" unless write?
140
+ self.original_set_brackets(key, serializer.dump(value))
37
141
  end
38
142
 
39
143
  def values_at(*args)
@@ -57,22 +161,21 @@ class TCHash < TokyoCabinet::HDB
57
161
  indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
58
162
  indexes.each do |index| values.delete_at index end
59
163
 
60
- values.collect{|v| Serializer.load(v)}
164
+ values.collect{|v| serializer.load(v)}
61
165
  end
62
166
 
63
167
  # This version of each fixes a problem in ruby 1.9. It also
64
168
  # removes the special entries
65
- def each19(&block)
66
- values = self.original_values.collect{|v| Serializer.load v}
169
+ def each(&block)
170
+ values = self.original_values
67
171
  keys = self.original_keys
68
172
  indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
69
173
  indexes.sort.reverse.each do |index| values.delete_at(index); keys.delete_at(index) end
70
174
 
71
- keys.zip(values).each &block
175
+ keys.zip(values.collect{|v| serializer.load v}).each &block
72
176
  end
73
177
 
74
178
  alias original_each each
75
- alias each each19
76
179
 
77
180
  def collect
78
181
  res = []
@@ -81,50 +184,17 @@ class TCHash < TokyoCabinet::HDB
81
184
  end
82
185
 
83
186
  def merge!(data)
84
- new_data = {}
85
- data.each do |key, values|
86
- self[key] = values
87
- end
88
- end
89
-
90
- alias original_open open
91
- def open(write = false)
92
- flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
93
- if !self.original_open(@path_to_db, flags)
94
- ecode = self.ecode
95
- raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
96
- end
97
- @write = write
98
- end
99
-
100
- def write?
101
- @write
102
- end
103
-
104
- def write
105
- self.close
106
- self.open(true)
107
- end
108
-
109
- def read
110
- self.close
111
- self.open(false)
112
- end
113
-
114
- def initialize(path, write = false)
115
- super()
116
- @path_to_db = path
117
-
118
- if write || ! File.exists?(@path_to_db)
119
- self.open(true)
187
+ raise "Closed TCHash connection" unless write?
188
+ serialized =
189
+ data.collect{|key, values| [key.to_s, serializer.dump(values)]}
190
+ if tranbegin
191
+ serialized.each do |key, values|
192
+ self.putasync(key, values)
193
+ end
194
+ trancommit
120
195
  else
121
- self.open(false)
196
+ raise "Transaction cannot initiate"
122
197
  end
123
198
  end
124
199
 
125
- def self.get(path, write = false)
126
- d = CONNECTIONS[path] ||= self.new(path, false)
127
- write ? d.write : d.read
128
- d
129
- end
130
200
  end
data/lib/rbbt/util/tsv.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rbbt/util/misc'
2
2
  require 'rbbt/util/open'
3
+ require 'rbbt/util/path'
3
4
  require 'rbbt/util/tc_hash'
4
5
  require 'rbbt/util/tmpfile'
5
6
  require 'rbbt/util/log'
@@ -7,1336 +8,160 @@ require 'rbbt/util/persistence'
7
8
  require 'digest'
8
9
  require 'fileutils'
9
10
 
11
+ require 'rbbt/util/tsv/parse'
12
+ require 'rbbt/util/tsv/accessor'
13
+ require 'rbbt/util/tsv/manipulate'
14
+ require 'rbbt/util/tsv/index'
15
+ require 'rbbt/util/tsv/attach'
10
16
  class TSV
11
- class FieldNotFoundError < StandardError;end
12
-
13
- module Field
14
- def ==(string)
15
- return false unless String === string
16
- self.sub(/#.*/,'').casecmp(string.sub(/#.*/,'')) == 0
17
- end
18
- end
19
-
20
- #{{{ Persistence
21
-
22
- CACHEDIR="/tmp/tsv_persistent_cache"
23
- FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
24
-
25
- def self.cachedir=(cachedir)
26
- CACHEDIR.replace cachedir
27
- FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
28
- end
29
-
30
- def self.cachedir
31
- CACHEDIR
32
- end
33
-
34
-
35
- #{{{ Headers and Field Stuff
36
17
 
37
18
  def self.headers(file, options = {})
38
- if file =~ /(.*)#(.*)/ and File.exists? $1
39
- options.merge! Misc.string2hash $2
19
+
20
+ ## Remove options from filename
21
+ if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
22
+ options = Misc.add_defaults options, Misc.string2hash($2)
40
23
  file = $1
41
24
  end
42
25
 
43
- options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
44
- io = Open.open(file)
45
- line = io.gets
46
- io.close
26
+ fields = case
27
+ when Open.can_open?(file)
28
+ Open.open(file, :grep => options[:grep]) do |f| TSV.parse_header(f, options[:sep], options[:header_hash]).values_at(0, 1).flatten end
29
+ when File === file
30
+ file = Open.grep(file, options[:grep]) if options[:grep]
31
+ TSV.parse_header(file, options[:sep], options[:header_hash]).values_at(0, 1).flatten
32
+ else
33
+ raise "File #{file.inspect} not found"
34
+ end
47
35
 
48
- if line =~ /^#{options[:header_hash]}/
49
- line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
50
- else
36
+ if fields.compact.empty?
51
37
  nil
52
- end
53
- end
54
-
55
- def self.fields_include(key_field, fields, field)
56
- return true if key_field == field or fields.include? field
57
- return false
58
- end
59
-
60
- def self.field_positions(key_field, fields, *selected)
61
- selected.collect do |sel|
62
- case
63
- when (sel.nil? or sel == :main or sel == key_field)
64
- -1
65
- when Integer === sel
66
- sel
67
- else
68
- Misc.field_position fields, sel
69
- end
70
- end
71
- end
72
-
73
- def fields_include(field)
74
- return TSV.fields_include key_field, fields, field
75
- end
76
-
77
- def field_positions(*selected)
78
- return nil if selected.nil? or selected == [nil]
79
- TSV.field_positions(key_field, fields, *selected)
80
- end
81
-
82
- def fields_at(*positions)
83
- return nil if fields.nil?
84
- return nil if positions.nil? or positions == [nil]
85
- (fields + [key_field]).values_at(*positions)
86
- end
87
-
88
- #{{{ Iteration, Merging, etc
89
- def through(new_key_field = nil, new_fields = nil, &block)
90
- new_key_position = (field_positions(new_key_field) || [-1]).first
91
- new_fields = [new_fields] if String === new_fields
92
-
93
- if new_key_position == -1
94
-
95
- if new_fields.nil? or new_fields == fields
96
- each &block
97
- return [key_field, fields]
98
- else
99
- new_field_positions = field_positions(*new_fields)
100
- each do |key, values|
101
- if values.nil?
102
- yield key, nil
103
- else
104
- yield key, values.values_at(*new_field_positions)
105
- end
106
- end
107
- return [key_field, fields_at(*new_field_positions)]
108
- end
109
-
110
- else
111
- new_field_positions = field_positions(*new_fields)
112
-
113
- new_field_names = fields_at(*new_field_positions)
114
- if new_field_names.nil? and fields
115
- new_field_names = fields.dup
116
- new_field_names.delete_at new_key_position
117
- new_field_names.unshift key_field
118
- end
119
-
120
- each do |key, values|
121
- if type == :double
122
- tmp_values = values + [[key]]
123
- else
124
- tmp_values = values + [key]
125
- end
126
-
127
- if new_field_positions.nil?
128
- new_values = values.dup
129
- new_values.delete_at new_key_position
130
- new_values.unshift [key]
131
- else
132
- new_values = tmp_values.values_at(*new_field_positions)
133
- end
134
-
135
- if not Array === tmp_values[new_key_position]
136
- yield tmp_values[new_key_position], NamedArray.name(new_values, new_field_names)
137
- else
138
- tmp_values[new_key_position].each do |new_key|
139
- if new_field_names
140
- yield new_key, NamedArray.name(new_values, new_field_names)
141
- else
142
- yield new_key, new_values
143
- end
144
- end
145
- end
146
- end
147
- return [(fields_at(new_key_position) || [nil]).first, new_field_names]
148
- end
149
- end
150
-
151
- def process(field)
152
- through do |key, values|
153
- values[field].replace yield(values[field], key, values) unless values[field].nil?
154
- end
155
- end
156
-
157
-
158
- def reorder(new_key_field, new_fields = nil, options = {})
159
- options = Misc.add_defaults options
160
- return TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
161
-
162
- new = {}
163
- new_key_field, new_fields = through new_key_field, new_fields do |key, values|
164
- if new[key].nil?
165
- new[key] = values
166
- else
167
- new[key] = new[key].zip(values)
168
- end
169
- end
170
-
171
- new.each do |key,values|
172
- values.each{|list| list.flatten! if Array === list}
173
- end
174
-
175
- if options[:persistence_file]
176
- reordered = TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
177
- reordered.merge! new
178
38
  else
179
- reordered = TSV.new(new, :case_insensitive => case_insensitive)
39
+ fields
180
40
  end
181
-
182
- reordered.key_field = new_key_field
183
- reordered.fields = new_fields
184
-
185
- reordered
186
41
  end
187
42
 
188
- def slice(new_fields, options = {})
189
- reorder(:main, new_fields)
43
+ def self.encapsulate_persistence(file, options)
190
44
  end
191
45
 
192
- def add_field(name = nil)
193
- each do |key, values|
194
- self[key] = values + [yield(key, values)]
195
- end
196
-
197
- if fields != nil
198
- new_fields = fields + [name]
199
- self.fields = new_fields
200
- end
201
- end
202
-
203
- def select(method)
204
- new = TSV.new({})
205
- new.key_field = key_field
206
- new.fields = fields.dup
207
- new.type = type
208
- new.filename = filename + "#Select: #{method.inspect}"
209
- new.case_insensitive = case_insensitive
46
+ def initialize(file = {}, type = nil, options = {})
47
+ # Process Options
210
48
 
211
- case
212
- when Array === method
213
- through do |key, values|
214
- new[key] = values if ([key,values].flatten & method).any?
215
- end
216
- when Regexp === method
217
- through do |key, values|
218
- new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
219
- end
220
- when String === method
221
- through do |key, values|
222
- new[key] = values if [key,values].flatten.select{|v| v == method}.any?
223
- end
224
- when Hash === method
225
- key = method.keys.first
226
- method = method.values.first
227
- case
228
- when (Array === method and (:main == key or key_field == key))
229
- method.each{|item| if values = self[item]; then new[item] = values; end}
230
- when Array === method
231
- through :main, key do |key, values|
232
- new[key] = self[key] if (values.flatten & method).any?
233
- end
234
- when Regexp === method
235
- through :main, key do |key, values|
236
- new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
237
- end
238
- when String === method
239
- through :main, key do |key, values|
240
- new[key] = self[key] if values.flatten.select{|v| v == method}.any?
241
- end
242
- end
243
- end
244
-
245
-
246
- new
247
- end
248
-
249
- def index(options = {})
250
- options = Misc.add_defaults options, :order => false, :persistence => false
251
-
252
- new, extra = Persistence.persist(filename, :Index, :tsv, options) do |filename, options|
253
- new = {}
254
- if options[:order]
255
- new_key_field, new_fields = through options[:target], options[:others] do |key, values|
256
-
257
- values.each_with_index do |list, i|
258
- next if list.nil? or list.empty?
259
-
260
- list = [list] unless Array === list
261
-
262
- list.each do |value|
263
- next if value.nil? or value.empty?
264
- value = value.downcase if options[:case_insensitive]
265
- new[value] ||= []
266
- new[value][i + 1] ||= []
267
- new[value][i + 1] << key
268
- end
269
- new[key] ||= []
270
- new[key][0] = key
271
- end
272
-
273
- end
274
-
275
- new.each do |key, values|
276
- values.flatten!
277
- values.compact!
278
- end
279
-
280
- else
281
- new_key_field, new_fields = through options[:target], options[:others] do |key, values|
282
- new[key] ||= []
283
- new[key] << key
284
- values.each do |list|
285
- next if list.nil?
286
- if Array === list
287
- list.each do |value|
288
- value = value.downcase if options[:case_insensitive]
289
- new[value] ||= []
290
- new[value] << key
291
- end
292
- else
293
- next if list.empty?
294
- value = list
295
- value = value.downcase if options[:case_insensitive]
296
- new[value] ||= []
297
- new[value] << key
298
- end
299
- end
300
- end
301
- end
302
-
303
- [new, {:key_field => new_key_field, :fields => new_fields, :type => :double, :case_insensitive => options[:case_insensitive]}]
304
- end
305
-
306
- new = TSV.new(new)
307
- new.filename = "Index: " + filename + options.inspect
308
- new.fields = extra[:fields]
309
- new.key_field = extra[:key_field]
310
- new.case_insensitive = extra[:case_insensitive]
311
- new.type = extra[:type]
312
- new
313
- end
314
-
315
- def smart_merge(other, match = nil, new_fields = nil)
316
-
317
- new_fields = [new_fields] if String === new_fields
318
- if self.fields and other.fields
319
- common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
320
- new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
321
-
322
- common_fields.delete match if String === match
323
- common_fields.delete_at match if Integer === match
324
-
325
- this_common_field_positions = self.field_positions *common_fields
326
- other_common_field_positions = other.field_positions *common_fields
327
- other_new_field_positions = other.field_positions *new_fields
328
- else
329
- nofieldinfo = true
330
- end
331
-
332
- case
333
- when TSV === match
334
- match_index = match
335
- matching_code_position = nil
336
-
337
- when Array === match
338
- match_index = match.first
339
- matching_code_position = field_positions(match.last).first
340
-
341
- when match =~ /^through:(.*)/
342
- through = $1
343
- if through =~ /(.*)#using:(.*)/
344
- through = $1
345
- matching_code_position = field_positions($2).first
346
- else
347
- matching_code_position = nil
348
- end
349
- index_fields = TSV.headers(through)
350
- target_field = index_fields.select{|field| other.fields_include field}.first
351
- Log.debug "Target Field: #{ target_field }"
352
- match_index = TSV.open_file(through).index(:field => target_field)
353
-
354
- when field_positions(match).first
355
- matching_code_position = field_positions(match).first
356
- match_index = nil
357
- end
358
-
359
- if matching_code_position.nil? and match_index.fields
360
- match_index.fields.each do |field|
361
- if matching_code_position = field_positions(field).first
362
- break
363
- end
364
- end
365
- end
366
-
367
- if match_index and match_index.key_field == other.key_field
368
- other_index = nil
369
- else
370
- other_index = (match === String and other.fields_include(match)) ?
371
- other.index(:other => match, :order => true) : other.index(:order => true)
372
- end
373
-
374
- each do |key,values|
375
- Log.debug "Key: #{ key }. Values: #{values * ", "}"
376
- if matching_code_position.nil? or matching_code_position == -1
377
- matching_codes = [key]
378
- else
379
- matching_codes = values[matching_code_position]
380
- matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
381
- end
382
- Log.debug "Matching codes: #{matching_codes}"
383
-
384
- next if matching_codes.nil?
385
-
386
- matching_codes.each do |matching_code|
387
- if match_index
388
- if match_index[matching_code]
389
- matching_code_fix = match_index[matching_code].first
390
- else
391
- matching_code_fix = nil
392
- end
393
- else
394
- matching_code_fix = matching_code
395
- end
396
-
397
- Log.debug "Matching code (fix): #{matching_code_fix}"
398
- next if matching_code_fix.nil?
399
-
400
- if other_index
401
- Log.debug "Using other_index"
402
- other_codes = other_index[matching_code_fix]
403
- else
404
- other_codes = matching_code_fix
405
- end
406
- Log.debug "Other codes: #{other_codes}"
407
-
408
- next if other_codes.nil? or other_codes.empty?
409
- other_code = other_codes.first
410
-
411
- if nofieldinfo
412
- next if other[other_code].nil?
413
- if type == :double
414
- other_values = [[other_code]] + other[other_code]
415
- else
416
- other_values = [other_code] + other[other_code]
417
- end
418
- other_values.delete_if do |list|
419
- list = [list] unless Array === list
420
- list.collect{|e| case_insensitive ? e.downcase : e }.
421
- select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
422
- end
423
-
424
- new_values = values + other_values
425
- else
426
- if other[other_code].nil?
427
- if type == :double
428
- other_values = [[]] * other.fields.length
429
- else
430
- other_values = [] * other.fields.length
431
- end
432
- else
433
- if type == :double
434
- other_values = other[other_code] + [[other_code]]
435
- else
436
- other_values = other[other_code] + [other_code]
437
- end
438
- end
439
-
440
-
441
- new_values = values.dup
442
-
443
- if type == :double
444
- this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
445
- new_values_tops = new_values[tpos]
446
-
447
- if other.type == :double
448
- new_values_tops += other_values[opos]
449
- else
450
- new_values_tops += [other_values[opos]]
451
- end
452
-
453
- new_values[tpos] = new_values_tops.uniq
454
- end
455
- end
456
-
457
- new_values.concat other_values.values_at *other_new_field_positions
458
- end
459
-
460
- self[key] = new_values
461
- end
462
- end
463
-
464
- self.fields = self.fields + new_fields unless nofieldinfo
465
- end
466
-
467
-
468
- def self.field_matches(tsv, values)
469
- if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
470
- return {}
471
- end
472
-
473
- key_field = tsv.key_field
474
- fields = tsv.fields
475
-
476
- field_values = {}
477
- fields.each{|field|
478
- field_values[field] = []
479
- }
480
-
481
- tsv.through do |key,entry_values|
482
- fields.zip(entry_values).each do |field,entry_field_values|
483
- field_values[field].concat entry_field_values
484
- end
485
- end
486
-
487
- field_values.each do |field,field_value_list|
488
- field_value_list.replace(values & field_value_list.flatten.uniq)
489
- end
490
-
491
- field_values[key_field] = values & tsv.keys
492
-
493
- field_values
494
- end
495
-
496
- def field_matches(values)
497
- TSV.field_matches(self, values)
498
- end
499
-
500
-
501
-
502
- #{{{ Helpers
503
-
504
- def self.index(file, options = {})
505
- options = Misc.add_defaults options, :data_persistence => true, :persistence => true
506
- persistence, persistence_file = Misc.process_options options, :persistence, :persistence_file
507
- options[:persistence], options[:persistence_file] = options.values_at :data_persistence, :data_persistence_file
508
- options.delete :data_persistence
509
- options.delete :data_persistence_file
510
-
511
- index, extra = Persistence.persist(file, :Index, :tsv, options) do |file, options, filename|
512
- TSV.new(file, :double, options).index
513
- end
514
- index
515
- end
516
-
517
- def self.index2(file, options = {})
518
- opt_data = options.dup
519
- opt_index = options.dup
520
- opt_data.delete :field
521
- opt_data.delete :persistence
522
- opt_index.delete :persistence
523
-
524
- opt_data[:persistence] = true if options[:data_persistence]
525
-
526
- opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
527
-
528
- if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
529
- Log.low "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
530
- TSV.new(Persistence::TSV.get(opt_index[:persistence_file], false), opt_index)
531
- else
532
- Log.low "Creating index for #{ file }: #{opt_index[:persistence_file]}"
533
- data = TSV.new(file, opt_data)
534
- data.index(opt_index)
535
- end
536
- end
537
-
538
- def self.open_file(file)
539
- if file =~ /(.*?)#(.*)/
540
- file, options = $1, Misc.string2hash($2.to_s)
541
- else
542
- options = {}
543
- end
544
-
545
- TSV.new(file, options)
546
- end
547
-
548
- #{{{ Accesor Methods
549
- attr_accessor :filename, :type, :case_insensitive, :key_field, :fields, :data
550
-
551
- def fields
552
- return nil if @fields.nil?
553
- fields = @fields
554
- fields.each do |f| f.extend Field end if Array === fields
555
- fields
556
- end
557
-
558
- def fields=(new_fields)
559
- @fields = new_fields
560
- if Persistence::TSV === @data
561
- @data.fields = new_fields
562
- end
563
- end
564
-
565
-
566
-
567
- def keys
568
- @data.keys
569
- end
570
-
571
- def values
572
- @data.values
573
- end
574
-
575
- def size
576
- @data.size
577
- end
578
-
579
- # Write
580
-
581
- def []=(key, value)
582
- key = key.downcase if @case_insensitive
583
- @data[key] = value
584
- end
585
-
586
-
587
- def merge!(new_data)
588
- new_data.each do |key, value|
589
- self[key] = value
590
- end
591
- end
592
-
593
- # Read
594
-
595
- def follow(value)
596
- return nil if value.nil?
597
- if String === value && value =~ /__Ref:(.*)/
598
- return self[$1]
599
- else
600
- value = NamedArray.name value, fields if Array === value and fields
601
- value
602
- end
603
- end
604
-
605
- def [](key)
606
- if Array === key
607
- return @data[key] if @data[key] != nil
608
- key.each{|k| v = self[k]; return v unless v.nil?}
609
- return nil
610
- end
611
-
612
- key = key.downcase if @case_insensitive and key !~ /^__Ref:/
613
- follow @data[key]
614
- end
615
-
616
- def values_at(*keys)
617
- keys.collect{|k|
618
- self[k]
619
- }
620
- end
621
-
622
- def each(&block)
623
- @data.each do |key, value|
624
- block.call(key, follow(value))
625
- end
626
- end
627
-
628
- def collect
629
- if block_given?
630
- @data.collect do |key, value|
631
- value = follow(value)
632
- key, values = yield key, value
633
- end
634
- else
635
- @data.collect do |key, value|
636
- [key, follow(value)]
637
- end
638
- end
639
- end
640
-
641
- def sort(&block)
642
- collect.sort(&block).collect{|p|
643
- key, value = p
644
- value = NamedArray.name value, fields if fields
645
- [key, value]
646
- }
647
- end
648
-
649
- def sort_by(&block)
650
- collect.sort_by &block
651
- end
652
-
653
- def values_to_s(values)
654
- case
655
- when (values.nil? and fields.nil?)
656
- "\n"
657
- when (values.nil? and not fields.nil?)
658
- "\t" << ([""] * fields.length) * "\t" << "\n"
659
- when (not Array === values)
660
- "\t" << values.to_s << "\n"
661
- when Array === values.first
662
- "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
663
- else
664
- "\t" << values * "\t" << "\n"
665
- end
666
- end
667
-
668
- def to_s(keys = nil)
669
- str = ""
670
-
671
- if fields
672
- str << "#" << key_field << "\t" << fields * "\t" << "\n"
673
- end
674
-
675
- if keys.nil?
676
- each do |key, values|
677
- key = key.to_s if Symbol === key
678
- str << key.dup << values_to_s(values)
679
- end
680
- else
681
- keys.zip(values_at(*keys)).each do |key, values|
682
- key = key.to_s if Symbol === key
683
- str << key.dup << values_to_s(values)
684
- end
685
- end
686
-
687
- str
688
- end
689
-
690
- #{{{ Parsing
691
-
692
- def self.parse_fields(io, delimiter = "\t")
693
- return [] if io.nil?
694
- fields = io.split(delimiter, -1)
695
- fields
696
- end
697
-
698
- def self.zip_fields(list, fields = nil)
699
- return [] if list.nil? || list.empty?
700
- fields ||= list.fields if list.respond_to? :fields
701
- zipped = list[0].zip(*list[1..-1])
702
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
703
- zipped
704
- end
705
-
706
- def self.key_order(file, options = {})
707
- # Prepare options
708
- options = add_defaults options,
709
- :sep => "\t",
710
- :sep2 => "|",
711
- :native => 0,
712
- :fix => nil,
713
- :exclude => nil,
714
- :select => nil,
715
- :grep => nil,
716
- :case_insensitive => false,
717
- :header_hash => '#'
718
-
719
- options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
720
-
721
- if String === file and File.exists? file
722
- file = File.open(file)
723
- end
724
-
725
- #{{{ Process first line
726
-
727
- line = file.gets
728
- raise "Empty content" if line.nil?
729
- line.chomp!
730
-
731
- if line =~ /^#{options[:header_hash]}/
732
- header_fields = parse_fields(line, options[:sep])
733
- header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
734
- line = file.gets
735
- else
736
- header_fields = nil
737
- end
738
-
739
- id_pos = Misc.field_position(header_fields, options[:native])
740
-
741
- if options[:extra].nil?
742
- extra_pos = nil
743
- max_cols = 0
744
- else
745
- extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
746
- end
747
-
748
- ids = []
749
- #{{{ Process rest
750
- while line do
751
- line.chomp!
752
-
753
- line = options[:fix].call line if options[:fix]
754
- break if not line
755
-
756
- # Select and fix lines
757
- if line.empty? or
758
- (options[:exclude] and options[:exclude].call(line)) or
759
- (options[:select] and not options[:select].call(line))
760
-
761
- line = file.gets
762
- next
763
- end
764
-
765
- ### Process line
766
-
767
- # Chunk fields
768
- parts = parse_fields(line, options[:sep])
769
-
770
- # Get next line
771
- line = file.gets
772
-
773
- # Get id field
774
- next if parts[id_pos].nil? || parts[id_pos].empty?
775
- ids << parts[id_pos]
776
- end
777
-
778
- ids
779
- end
780
-
781
- def self.parse_header(stream, sep, header_hash)
782
- fields, key_field = nil
783
- options = {}
784
-
785
- line = stream.gets
786
-
787
- if line and line =~ /^#{header_hash}: (.*)/
788
- options = Misc.string2hash $1
789
- line = stream.gets
790
- end
791
-
792
- sep = options[:sep] if options[:sep]
793
-
794
- if line and line =~ /^#{header_hash}/
795
- line.chomp!
796
- fields = parse_fields(line, sep)
797
- key_field = fields.shift
798
- key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
799
- line = stream.gets
800
- end
801
-
802
- raise "Empty content" if line.nil?
803
- return key_field, fields, options, line
804
- end
805
-
806
- def self.parse(stream, options = {})
807
- # Prepare options
808
- options = Misc.add_defaults options,
809
- :case_insensitive => false,
810
- :type => :double,
811
-
812
- :merge => false,
813
- :keep_empty => true,
814
- :cast => nil,
815
-
816
- :sep => "\t",
817
- :sep2 => "|",
818
- :header_hash => '#',
819
-
820
- :key => 0,
821
- :fields => nil,
822
-
823
- :fix => nil,
824
- :exclude => nil,
825
- :select => nil,
826
- :grep => nil
827
-
828
-
829
- sep, header_hash =
830
- Misc.process_options options, :sep, :header_hash
831
-
832
- key_field, other_fields, more_options, line = TSV.parse_header(stream, sep, header_hash)
833
-
834
- sep = more_options[:sep] if more_options[:sep]
835
- options = Misc.add_defaults options, more_options
836
- sep2 = Misc.process_options options, :sep2
837
-
838
- key, others =
839
- Misc.process_options options, :key, :others
840
-
841
- if key_field.nil?
842
- key_pos = key
843
- key_field, fields = nil
844
- else
845
- all_fields = [key_field].concat other_fields
846
-
847
- key_pos = Misc.field_position(all_fields, key)
848
-
849
- if String === others or Symbol === others
850
- others = [others]
851
- end
852
-
853
- if others.nil?
854
- other_pos = (0..(all_fields.length - 1)).to_a
855
- other_pos.delete key_pos
856
- else
857
- other_pos = Misc.field_position(all_fields, *others)
858
- end
859
-
860
- key_field = all_fields[key_pos]
861
- fields = all_fields.values_at *other_pos
862
- end
863
-
864
- case_insensitive, type, merge, keep_empty, cast =
865
- Misc.process_options options, :case_insensitive, :type, :merge, :keep_empty, :cast
866
- fix, exclude, select, grep =
867
- Misc.process_options options, :fix, :exclude, :select, :grep
868
-
869
- #{{{ Process rest
870
- data = {}
871
- single = type.to_sym != :double
872
- max_cols = 0
873
- while line do
874
- line.chomp!
875
-
876
- line = fix.call line if fix
877
- break if not line
878
-
879
- if header_hash and line =~ /^#{header_hash}/
880
- line = stream.gets
881
- next
882
- end
883
-
884
- if line.empty? or
885
- (exclude and exclude.call(line)) or
886
- (select and not select.call(line))
887
-
888
- line = stream.gets
889
- next
890
- end
891
-
892
- # Chunk fields
893
- parts = parse_fields(line, sep)
894
-
895
- # Get next line
896
- line = stream.gets
897
-
898
- # Get id field
899
- next if parts[key_pos].nil? || parts[key_pos].empty?
900
-
901
- if single
902
- ids = parse_fields(parts[key_pos], sep2)
903
- ids.collect!{|id| id.downcase} if case_insensitive
904
-
905
- id = ids.shift
906
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
907
-
908
- if key_field.nil?
909
- other_pos = (0..(parts.length - 1)).to_a
910
- other_pos.delete key_pos
911
- end
912
-
913
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
914
- extra.collect! do |elem|
915
- case
916
- when String === cast
917
- elem.send(cast)
918
- when Proc === cast
919
- cast.call elem
920
- end
921
- end if cast
922
-
923
- max_cols = extra.size if extra.size > (max_cols || 0)
924
- case type
925
- when :list
926
- data[id] = extra unless data.include? id
927
- when :flat
928
- data[id] = extra.flatten unless data.include? id
929
- when :single
930
- data[id] = extra.flatten.first unless data.include? id
931
- end
932
-
933
- else
934
- ids = parse_fields(parts[key_pos], sep2)
935
- ids.collect!{|id| id.downcase} if case_insensitive
936
-
937
- id = ids.shift
938
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
939
-
940
- if key_field.nil?
941
- other_pos = (0..(parts.length - 1)).to_a
942
- other_pos.delete key_pos
943
- end
944
-
945
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
946
- extra.collect! do |list|
947
- case
948
- when String === cast
949
- list.collect{|elem| elem.send(cast)}
950
- when Proc === cast
951
- list.collect{|elem| cast.call elem}
952
- end
953
- end if cast
954
-
955
- max_cols = extra.size if extra.size > (max_cols || 0)
956
- if merge
957
- data[id] = extra unless data.include? id
958
- else
959
- if not data.include? id
960
- data[id] = extra
961
- else
962
- entry = data[id]
963
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
964
- extra.each_with_index do |f, i|
965
- if f.empty?
966
- next unless keep_empty
967
- f= [""]
968
- end
969
- entry[i] ||= []
970
- entry[i] = entry[i].concat f
971
- end
972
- data[id] = entry
973
- end
974
- end
975
- end
976
- end
977
-
978
- if keep_empty and max_cols > 0
979
- data.each do |key, values|
980
- next if values =~ /__Ref:/
981
- new_values = values
982
- max_cols.times do |i|
983
- if type == :double
984
- new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
985
- else
986
- new_values[i] = "" if new_values[i].nil?
987
- end
988
- end
989
- data[key] = new_values
990
- end
991
- end
992
-
993
- [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive}]
994
- end
995
-
996
- def self.parse2(data, file, options = {})
997
-
998
- # Prepare options
999
- options = Misc.add_defaults options,
1000
- :sep => "\t",
1001
- :sep2 => "|",
1002
- :native => 0,
1003
- :extra => nil,
1004
- :fix => nil,
1005
- :exclude => nil,
1006
- :select => nil,
1007
- :grep => nil,
1008
- :single => false,
1009
- :unique => false,
1010
- :merge => false,
1011
- :flatten => false,
1012
- :keep_empty => true,
1013
- :case_insensitive => false,
1014
- :header_hash => '#' ,
1015
- :cast => nil,
1016
- :persistence_file => nil
1017
-
1018
-
1019
- options[:unique] = options[:uniq] if options[:unique].nil?
1020
- options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
1021
- options[:flatten] = true if options[:single]
1022
-
1023
- #{{{ Process first line
1024
-
1025
- line = file.gets
1026
- raise "Empty content" if line.nil?
1027
- line.chomp!
1028
-
1029
- if line =~ /^#{options[:header_hash]}/
1030
- header_fields = parse_fields(line, options[:sep])
1031
- header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
1032
- line = file.gets
1033
- else
1034
- header_fields = nil
1035
- end
1036
-
1037
- id_pos = Misc.field_position(header_fields, options[:native])
1038
-
1039
- if options[:extra].nil?
1040
- extra_pos = nil
1041
- max_cols = 0
1042
- else
1043
- extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
1044
- end
1045
-
1046
- #{{{ Process rest
1047
- while line do
1048
- line.chomp!
1049
-
1050
- line = options[:fix].call line if options[:fix]
1051
- break if not line
1052
-
1053
- if options[:header_hash] && line =~ /^#{options[:header_hash]}/
1054
- line = file.gets
1055
- next
1056
- end
1057
-
1058
- # Select and fix lines
1059
- if line.empty? or
1060
- (options[:exclude] and options[:exclude].call(line)) or
1061
- (options[:select] and not options[:select].call(line))
1062
-
1063
- line = file.gets
1064
- next
1065
- end
1066
-
1067
- ### Process line
1068
-
1069
- # Chunk fields
1070
- parts = parse_fields(line, options[:sep])
1071
-
1072
- # Get next line
1073
- line = file.gets
1074
-
1075
- # Get id field
1076
- next if parts[id_pos].nil? || parts[id_pos].empty?
1077
- ids = parse_fields(parts[id_pos], options[:sep2])
1078
- ids.collect!{|id| id.downcase } if options[:case_insensitive]
1079
-
1080
- # Get extra fields
1081
-
1082
- if options[:extra].nil? and not (options[:flatten] or options[:single])
1083
- extra = parts
1084
- extra.delete_at(id_pos)
1085
- max_cols = extra.size if extra.size > (max_cols || 0)
1086
- else
1087
- if extra_pos.nil?
1088
- extra = parts
1089
- extra.delete_at id_pos
1090
- else
1091
- extra = parts.values_at(*extra_pos)
1092
- end
1093
- end
1094
-
1095
- extra.collect!{|value| parse_fields(value, options[:sep2])}
1096
- extra.collect!{|values| values.first} if options[:unique]
1097
- extra.flatten! if options[:flatten]
1098
- extra = extra.first if options[:single]
1099
-
1100
- if options[:cast]
1101
- if Array === extra[0]
1102
- e = extra
1103
- else
1104
- e = [extra]
1105
- end
1106
-
1107
- e.each do |list|
1108
- case
1109
- when String === options[:cast]
1110
- list.collect!{|elem| elem.send(options[:cast])}
1111
- when Proc === options[:cast]
1112
- list.collect!{|elem| options[:cast].call elem}
1113
- end
1114
- end
1115
- end
1116
-
1117
- main_entry = ids.shift
1118
- ids.each do |id| data[id] = "__Ref:#{main_entry}" end
1119
-
1120
- case
1121
- when (options[:single] or options[:unique] or not options[:merge])
1122
- data[main_entry] = extra unless data.include? main_entry
1123
- when options[:flatten]
1124
- entry = data[main_entry]
1125
-
1126
- if entry.nil?
1127
- data[main_entry] = extra
1128
- else
1129
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
1130
- if Persistence::TSV === data
1131
- data[main_entry] = entry.concat extra
1132
- else
1133
- data[main_entry].concat extra
1134
- end
1135
- end
1136
- else
1137
- entry = data[main_entry]
1138
- if entry.nil?
1139
- data[main_entry] = extra
1140
- else
1141
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
1142
- extra.each_with_index do |fields, i|
1143
- if fields.empty?
1144
- next unless options[:keep_empty]
1145
- fields = [""]
1146
- end
1147
- entry[i] ||= []
1148
- entry[i] = entry[i].concat fields
1149
- end
1150
- data[main_entry] = entry
1151
- end
1152
- end
1153
- end
1154
-
1155
- if options[:keep_empty] and not max_cols.nil?
1156
- data.each do |key,values|
1157
- new_values = values
1158
- max_cols.times do |i|
1159
- new_values[i] ||= [""]
1160
- end
1161
- data[key] = new_values
1162
- end
1163
- end
1164
-
1165
- # Save header information
1166
- key_field = nil
1167
- fields = nil
1168
- if header_fields && header_fields.any?
1169
- key_field = header_fields[id_pos]
1170
- if extra_pos.nil?
1171
- fields = header_fields
1172
- fields.delete_at(id_pos)
1173
- else
1174
- fields = header_fields.values_at(*extra_pos)
1175
- end
1176
- end
1177
-
1178
- data.read if Persistence::TSV === data
1179
-
1180
- [key_field, fields]
1181
- end
1182
- def initialize(file = {}, type = :double, options = {})
1183
49
  if Hash === type
1184
50
  options = type
1185
- type = :double
51
+ type = nil
1186
52
  end
1187
53
 
54
+ ## Remove options from filename
1188
55
  if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
1189
56
  options = Misc.add_defaults options, Misc.string2hash($2)
1190
57
  file = $1
1191
58
  end
1192
59
 
1193
- options = Misc.add_defaults options, :persistence => false, :case_insensitive => false, :type => type
60
+ options = Misc.add_defaults options, :persistence => false, :type => type
61
+
62
+ # Extract Filename
63
+
64
+ file, extra = file if Array === file and file.length == 2 and Hash === file.last
1194
65
 
1195
66
  @filename = Misc.process_options options, :filename
1196
67
  @filename ||= case
68
+ when Path === file
69
+ file
1197
70
  when (String === file and File.exists? file)
1198
71
  File.expand_path file
72
+ when String === file
73
+ file
1199
74
  when File === file
1200
75
  File.expand_path file.path
76
+ when TSV === file
77
+ File.expand_path file.filename
78
+ when (Persistence::TSV === file and file.filename)
79
+ File.expand_path file.filename
1201
80
  else
1202
- Digest::MD5.hexdigest(file.inspect)
81
+ file.class.to_s
1203
82
  end
1204
83
 
1205
- if block_given?
1206
- @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options| yield filename, options end
84
+ # Process With Persistence
85
+ # Use filename to identify the persistence
86
+ # Several inputs supported
87
+ # Filename or File: Parsed
88
+ # Hash: Encapsulated, empty info
89
+ # TSV: Duplicate
90
+ case
91
+ when block_given?
92
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options.merge(:force_array => true)) do |filename, options| yield filename, options end
93
+ extra.each do |key, values|
94
+ self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
95
+ end if not extra.nil?
96
+
1207
97
  else
1208
- @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options|
1209
- data, extra = nil
1210
- case
1211
- when String === file
1212
- File.open(file) do |f|
1213
- data, extra = TSV.parse(f, options)
98
+ case
99
+ when Hash === file
100
+ @data = file
101
+ when TSV === file
102
+ @data = file.data
103
+ when Persistence::TSV === file
104
+ @data = file
105
+ %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
106
+ if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
107
+ self.send "#{key}=".to_sym, @data.send(key.to_sym)
1214
108
  end
1215
- when File === file
1216
- data, extra = TSV.parse(file, options)
1217
- when Hash === file
1218
- data = file
1219
- extra = {:case_insensitive => options[:case_insensitive], :type => type}
1220
109
  end
1221
-
1222
- [data, extra]
1223
- end
1224
- end
1225
-
1226
- @type = extra[:type]
1227
- @key_field = extra[:key_field]
1228
- @fields = extra[:fields]
1229
- @case_insensitive = extra[:case_insensitive]
1230
- end
1231
-
1232
- def initialize2(file = {}, options = {})
1233
- options = Misc.add_defaults options
1234
- options[:persistence] = true if options[:persistence_file]
1235
-
1236
- if String === file && file =~ /(.*?)#(.*)/
1237
- file, file_options = $1, $2
1238
- options = Misc.add_defaults file_options, options
1239
- end
1240
-
1241
- @case_insensitive = options[:case_insensitive] == true
1242
- @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
1243
-
1244
- case
1245
- when TSV === file
1246
- Log.low "Copying TSV"
1247
- @filename = file.filename
1248
-
1249
- if options[:persistence] and not Persistence::TSV === file.data
1250
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
1251
- Log.low "Making persistance #{ persistence_file }"
1252
- @data = TCHash.get(persistence_file)
1253
- @data.merge! file
1254
- @data.key_field = file.key_field
1255
- @data.fields = file.fields
1256
- else
1257
- @data = file.data
1258
- end
1259
-
1260
- @key_field = file.key_field
1261
- @fields = file.fields
1262
- @case_insensitive = file.case_insensitive
1263
- @list = file.list
1264
- return self
1265
- when Hash === file
1266
- Log.low "Encapsulating Hash in TSV object"
1267
- @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
1268
- if options[:persistence]
1269
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
1270
- Log.low "Making persistance #{ persistence_file }"
1271
- @data = TCHash.get(persistence_file)
1272
- @data.merge! file
1273
110
  else
1274
- @data = file
1275
- end
1276
- return self
1277
- when Persistence::TSV === file
1278
- Log.low "Encapsulating Persistence::TSV"
1279
- @filename = "Persistence::TSV:" + Digest::MD5.hexdigest(file.inspect)
1280
- @data = file
1281
- @key_field = file.key_field
1282
- @fields = file.fields
1283
- return self
1284
- when File === file
1285
- @filename = File.expand_path file.path
1286
- when String === file && File.exists?(file)
1287
- @filename = File.expand_path file
1288
- file = Open.open(file)
1289
- when StringIO
1290
- else
1291
- raise "File #{file} not found"
1292
- end
111
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options) do |file, options, filename|
112
+ data, extra = nil
1293
113
 
1294
- if options[:persistence]
1295
- options.delete :persistence
1296
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
114
+ case
115
+ ## Parse source
116
+ when (String === file and file.respond_to? :open)
117
+ data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
118
+ extra[:namespace] ||= file.namespace
119
+ extra[:datadir] ||= file.datadir
120
+ when StringIO === file
121
+ data, extra = TSV.parse(file, options)
122
+ when Open.can_open?(file)
123
+ Open.open(file, :grep => options[:grep]) do |f|
124
+ data, extra = TSV.parse(f, options)
125
+ end
126
+ #extra[:namespace] = File.basename(File.dirname(filename))
127
+ #extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
128
+ when File === file
129
+ file = Open.grep(file, options[:grep]) if options[:grep]
130
+ data, extra = TSV.parse(file, options)
131
+ extra[:namespace] = File.basename(File.dirname(file.filename))
132
+ extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
133
+ ## Encapsulate Hash or TSV
134
+ when block_given?
135
+ data
136
+ else
137
+ raise "Unknown input in TSV.new #{file.inspect}"
138
+ end
1297
139
 
1298
- if File.exists? persistence_file
1299
- Log.low "Loading Persistence for #{ @filename } in #{persistence_file}"
1300
- @data = Persistence::TSV.get(persistence_file, false)
1301
- @key_field = @data.key_field
1302
- @fields = @data.fields
1303
- else
1304
- @data = Persistence::TSV.get(persistence_file, true)
1305
- file = Open.grep(file, options[:grep]) if options[:grep]
140
+ extra[:filename] = filename
1306
141
 
1307
- Log.low "Persistent Parsing for #{ @filename } in #{persistence_file}"
1308
- @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
1309
- @data.key_field = @key_field
1310
- @data.fields = @fields
1311
- @data.read
142
+ [data, extra]
143
+ end
1312
144
  end
1313
- else
1314
- Log.low "Non-persistent parsing for #{ @filename }"
1315
- @data = {}
1316
- file = Open.grep(file, options[:grep]) if options[:grep]
1317
- @key_field, @fields = TSV.parse(@data, file, options)
1318
145
  end
1319
146
 
1320
- file.close
1321
- @case_insensitive = options[:case_insensitive] == true
147
+ if not extra.nil?
148
+ %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
149
+ if extra.include? key.to_sym
150
+ self.send("#{key}=".to_sym, extra[key.to_sym])
151
+ if @data.respond_to? "#{key}=".to_sym
152
+ @data.send("#{key}=".to_sym, extra[key.to_sym])
153
+ end
154
+ end
155
+ end
156
+ end
1322
157
  end
1323
158
 
1324
- end
1325
-
1326
- #{{{ CacheHelper
1327
- require 'rbbt/util/cachehelper'
1328
- module CacheHelper
1329
- def self.tsv_cache(name, key = [])
1330
- cache_file = CacheHelper.build_filename name, key
159
+ def write
160
+ @data.write if @data.respond_to? :write
161
+ end
1331
162
 
1332
- if File.exists? cache_file
1333
- Log.debug "TSV cache file '#{cache_file}' found"
1334
- hash = TCHash.get(cache_file)
1335
- TSV.new(hash)
1336
- else
1337
- Log.debug "Producing TSV cache file '#{cache_file}'"
1338
- data = yield
1339
- TSV.new(data, :persistence_file => cache_file)
1340
- end
163
+ def read
164
+ @data.read if @data.respond_to? :read
1341
165
  end
166
+
1342
167
  end