rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -1,58 +1,13 @@
1
1
  require 'rbbt/util/open'
2
2
  require 'rbbt/util/tsv'
3
3
  require 'rbbt/util/log'
4
+ require 'rbbt/util/path'
4
5
  require 'rbbt/util/rake'
5
6
 
6
7
  module PKGData
7
8
  attr_accessor :claims
8
- def self.extended(base)
9
- base.claims = {}
10
- end
11
-
12
- module Path
13
- attr_accessor :base
14
-
15
- def method_missing(name, *args, &block)
16
- new = File.join(self.dup, name.to_s)
17
- new.extend Path
18
- new.base = base
19
- new
20
- end
21
-
22
- def [](name)
23
- new = File.join(self.dup, name.to_s)
24
- new.extend Path
25
- new.base = base
26
- new
27
- end
28
-
29
- def tsv(options = {})
30
- produce
31
- TSV.new self, options
32
- end
33
-
34
- def index(field = nil, other = nil, options = {})
35
- produce
36
- TSV.index self, options.merge(:target => field, :others => other)
37
- end
38
-
39
- def open
40
- produce
41
- Open.open(self)
42
- end
43
-
44
- def read
45
- produce
46
- Open.read(self)
47
- end
48
-
49
- def produce
50
- return if File.exists? self
51
-
52
- Log.debug("Trying to produce '#{ self }'")
53
- file, producer = base.reclaim self
54
- base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
55
- end
9
+ def self.extended(pkg_module)
10
+ pkg_module.claims = {}
56
11
  end
57
12
 
58
13
  class SharedirNotFoundError < StandardError; end
@@ -84,19 +39,18 @@ module PKGData
84
39
 
85
40
  def files
86
41
  path = datadir.dup.extend Path
87
- path.base = self
42
+ path.pkg_module = self
43
+ path.datadir = datadir
88
44
  path
89
45
  end
90
46
 
91
47
  def in_datadir?(file)
92
- if File.expand_path(file.to_s) =~ /^#{Regexp.quote File.expand_path(datadir)}/
93
- true
94
- else
95
- false
96
- end
48
+ Misc.in_directory? file, datadir
97
49
  end
98
50
 
99
- def claim(file, get = nil, subdir = nil, sharedir = nil)
51
+ # file is the complete path of the file inside the datadir
52
+ # get is the get method. :Rakefile for
53
+ def claim(file, get = nil, subdir = nil, namespace = nil, sharedir = nil)
100
54
  file = case
101
55
  when (file.nil? or file === :all)
102
56
  File.join(datadir, subdir.to_s)
@@ -107,7 +61,7 @@ module PKGData
107
61
  end
108
62
 
109
63
  sharedir ||= PKGData.get_caller_sharedir
110
- claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
64
+ claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir, :namespace => namespace}
111
65
  produce(file, get, subdir, sharedir) if TSV === get
112
66
  produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
113
67
  end
@@ -139,9 +93,12 @@ module PKGData
139
93
 
140
94
  FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
141
95
 
96
+ relative_path = Misc.path_relative_to file, datadir
142
97
  case
143
98
  when get.nil?
144
- FileUtils.cp File.join(sharedir, subdir.to_s, File.basename(file.to_s)), file.to_s
99
+ FileUtils.cp File.join(sharedir, relative_path), file.to_s
100
+ when StringIO === get
101
+ Open.write(file, get.read)
145
102
  when Proc === get
146
103
  Open.write(file, get.call)
147
104
  when TSV === get
@@ -153,10 +110,10 @@ module PKGData
153
110
  rakefile = File.join(sharedir, get.to_s)
154
111
  end
155
112
  produce_with_rake(rakefile, subdir, file)
156
- when String === get
113
+ when (String === get and Open.remote? get)
157
114
  Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
158
115
  else
159
- raise "Unknown Get: #{get.class}"
116
+ raise "Unknown Get: #{get.class} #{get}"
160
117
  end
161
118
  end
162
119
  end
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/open'
2
+ require 'rbbt/util/misc'
2
3
  require 'rbbt/util/tsv'
3
4
  require 'rbbt/util/log'
4
5
  require 'rbbt/util/cmd'
@@ -52,7 +53,20 @@ module PKGSoftware
52
53
 
53
54
  FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
54
55
 
55
- if get.nil? or get.empty?
56
+ case
57
+ when get == :directory
58
+ FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
59
+ subdir = Misc.path_relative_to File.dirname(path), opt_dir
60
+ source = File.join(sharedir, 'install/software', subdir, pkg)
61
+
62
+ FileUtils.cp_r File.join(sharedir, 'install/software', subdir, pkg), path
63
+ when get == :binary
64
+ FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
65
+ subdir = Misc.path_relative_to File.dirname(path), opt_dir
66
+ source = File.join(sharedir, 'install/software', subdir, pkg)
67
+
68
+ FileUtils.cp File.join(sharedir, 'install/software', subdir, pkg), path
69
+ when (get.nil? or get.empty?)
56
70
  CMD.cmd("#{File.join(sharedir, 'install', 'software', pkg)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}", :stderr => Log::HIGH)
57
71
  else
58
72
  CMD.cmd("#{File.join(sharedir, 'install', 'software', get)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}")
@@ -17,7 +17,11 @@ module RakeHelper
17
17
  end
18
18
  def self.define_task(file, *args, &block)
19
19
  @@files ||= []
20
- @@files << file
20
+ if Hash === file
21
+ @@files << file.keys.first.to_s
22
+ else
23
+ @@files << file.to_s
24
+ end
21
25
  old_define_task(file, *args, &block)
22
26
  end
23
27
 
@@ -1,39 +1,143 @@
1
+ require 'rbbt/util/misc'
1
2
  require 'tokyocabinet'
2
3
 
3
4
  class TCHash < TokyoCabinet::HDB
4
5
  class OpenError < StandardError;end
5
6
  class KeyFormatError < StandardError;end
6
7
 
7
- Serializer = Marshal
8
+ class StringSerializer
9
+ def self.dump(str); str.to_s; end
10
+ def self.load(str); str; end
11
+ end
12
+
13
+ class StringArraySerializer
14
+ def self.dump(array)
15
+ array.collect{|a| a.to_s} * "\t"
16
+ end
17
+
18
+ def self.load(string)
19
+ string.split(/\t/)
20
+ end
21
+ end
22
+
23
+ class StringDoubleArraySerializer
24
+ def self.dump(array)
25
+ array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
26
+ end
27
+
28
+ def self.load(string)
29
+ string.split(/\t/).collect{|l| l.split("|")}
30
+ end
31
+ end
32
+
33
+
34
+ ALIAS = {:marshal => Marshal, nil => Marshal, :single => StringSerializer, :list => StringArraySerializer, :double => StringDoubleArraySerializer}
35
+
36
+ CONNECTIONS = {}
8
37
 
9
38
  FIELD_INFO_ENTRIES = {
10
- :fields => '__tokyocabinet_hash_fields',
11
- :key_field => '__tokyocabinet_hash_key_field',
12
- :filename => '__tokyocabinet_hash_filename',
13
- :type => '__tokyocabinet_hash_type',
14
- :case_insensitive => '__tokyocabinet_hash_case_insensitive'
39
+ :type => '__tokyocabinet_hash_type',
40
+ :serializer => '__tokyocabinet_hash_serializer',
41
+ :identifiers => '__tokyocabinet_hash_identifiers',
42
+ :datadir => '__tokyocabinet_hash_datadir',
43
+ :fields => '__tokyocabinet_hash_fields',
44
+ :key_field => '__tokyocabinet_hash_key_field',
45
+ :filename => '__tokyocabinet_hash_filename',
46
+ :namespace => '__tokyocabinet_hash_namspace',
47
+ :type => '__tokyocabinet_hash_type',
48
+ :case_insensitive => '__tokyocabinet_hash_case_insensitive'
15
49
  }
16
- CONNECTIONS = {}
17
50
 
18
51
  FIELD_INFO_ENTRIES.each do |entry, key|
19
52
  class_eval do
20
- define_method entry.to_s, proc{self[key]}
21
- define_method entry.to_s + "=", proc{|value| write unless write?; self[key] = value}
53
+ define_method entry.to_s, proc{v = self.original_get_brackets(key); v.nil? ? nil : Marshal.load(v)}
54
+ define_method entry.to_s + "=", proc{|value| write unless write?; self.original_set_brackets key, Marshal.dump(value)}
55
+ end
56
+ end
57
+
58
+ def serializer
59
+ @serializer
60
+ end
61
+
62
+ def serializer=(value)
63
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer],value) unless value.nil?
64
+ end
65
+
66
+ alias original_open open
67
+ def open(write = false)
68
+ flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
69
+ if !self.original_open(@path_to_db, flags)
70
+ ecode = self.ecode
71
+ raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
72
+ end
73
+
74
+ @write = write
75
+
76
+ if write
77
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s) unless @serializer.nil?
78
+ else
79
+ serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
80
+
81
+ if serializer_str.nil? or serializer_str.empty?
82
+ @serializer = Marshal
83
+ else
84
+ mod = Misc.string2const serializer_str
85
+ @serializer = mod
86
+ end
87
+ end
88
+ end
89
+
90
+ def write?
91
+ @write
92
+ end
93
+
94
+ def write
95
+ self.close
96
+ self.open(true)
97
+ end
98
+
99
+ def read
100
+ self.close
101
+ self.open(false)
102
+ end
103
+
104
+ def initialize(path, write = false, serializer = Marshal)
105
+ super()
106
+
107
+ serializer = ALIAS[serializer] if ALIAS.include? serializer
108
+
109
+ @path_to_db = path
110
+ @serializer = serializer
111
+
112
+ if write || ! File.exists?(@path_to_db)
113
+ self.open(true)
114
+ else
115
+ self.open(false)
22
116
  end
23
117
  end
24
118
 
119
+ def self.get(path, write = false, serializer = Marshal)
120
+ serializer = ALIAS[serializer] if ALIAS.include? serializer
121
+ @serializer = serializer
122
+ d = CONNECTIONS[path] ||= self.new(path, false, @serializer)
123
+ write ? d.write : d.read
124
+ d
125
+ end
126
+
127
+ #{{{ ACESSORS
128
+
25
129
  alias original_get_brackets []
26
130
  def [](key)
27
131
  return nil unless String === key
28
132
  result = self.original_get_brackets(key)
29
- result ? Serializer.load(result) : nil
133
+ result ? @serializer.load(result) : nil
30
134
  end
31
135
 
32
136
  alias original_set_brackets []=
33
137
  def []=(key,value)
34
138
  raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
35
- write unless write?
36
- self.original_set_brackets(key, Serializer.dump(value))
139
+ raise "Closed TCHash connection" unless write?
140
+ self.original_set_brackets(key, serializer.dump(value))
37
141
  end
38
142
 
39
143
  def values_at(*args)
@@ -57,22 +161,21 @@ class TCHash < TokyoCabinet::HDB
57
161
  indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
58
162
  indexes.each do |index| values.delete_at index end
59
163
 
60
- values.collect{|v| Serializer.load(v)}
164
+ values.collect{|v| serializer.load(v)}
61
165
  end
62
166
 
63
167
  # This version of each fixes a problem in ruby 1.9. It also
64
168
  # removes the special entries
65
- def each19(&block)
66
- values = self.original_values.collect{|v| Serializer.load v}
169
+ def each(&block)
170
+ values = self.original_values
67
171
  keys = self.original_keys
68
172
  indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
69
173
  indexes.sort.reverse.each do |index| values.delete_at(index); keys.delete_at(index) end
70
174
 
71
- keys.zip(values).each &block
175
+ keys.zip(values.collect{|v| serializer.load v}).each &block
72
176
  end
73
177
 
74
178
  alias original_each each
75
- alias each each19
76
179
 
77
180
  def collect
78
181
  res = []
@@ -81,50 +184,17 @@ class TCHash < TokyoCabinet::HDB
81
184
  end
82
185
 
83
186
  def merge!(data)
84
- new_data = {}
85
- data.each do |key, values|
86
- self[key] = values
87
- end
88
- end
89
-
90
- alias original_open open
91
- def open(write = false)
92
- flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
93
- if !self.original_open(@path_to_db, flags)
94
- ecode = self.ecode
95
- raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
96
- end
97
- @write = write
98
- end
99
-
100
- def write?
101
- @write
102
- end
103
-
104
- def write
105
- self.close
106
- self.open(true)
107
- end
108
-
109
- def read
110
- self.close
111
- self.open(false)
112
- end
113
-
114
- def initialize(path, write = false)
115
- super()
116
- @path_to_db = path
117
-
118
- if write || ! File.exists?(@path_to_db)
119
- self.open(true)
187
+ raise "Closed TCHash connection" unless write?
188
+ serialized =
189
+ data.collect{|key, values| [key.to_s, serializer.dump(values)]}
190
+ if tranbegin
191
+ serialized.each do |key, values|
192
+ self.putasync(key, values)
193
+ end
194
+ trancommit
120
195
  else
121
- self.open(false)
196
+ raise "Transaction cannot initiate"
122
197
  end
123
198
  end
124
199
 
125
- def self.get(path, write = false)
126
- d = CONNECTIONS[path] ||= self.new(path, false)
127
- write ? d.write : d.read
128
- d
129
- end
130
200
  end
data/lib/rbbt/util/tsv.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rbbt/util/misc'
2
2
  require 'rbbt/util/open'
3
+ require 'rbbt/util/path'
3
4
  require 'rbbt/util/tc_hash'
4
5
  require 'rbbt/util/tmpfile'
5
6
  require 'rbbt/util/log'
@@ -7,1336 +8,160 @@ require 'rbbt/util/persistence'
7
8
  require 'digest'
8
9
  require 'fileutils'
9
10
 
11
+ require 'rbbt/util/tsv/parse'
12
+ require 'rbbt/util/tsv/accessor'
13
+ require 'rbbt/util/tsv/manipulate'
14
+ require 'rbbt/util/tsv/index'
15
+ require 'rbbt/util/tsv/attach'
10
16
  class TSV
11
- class FieldNotFoundError < StandardError;end
12
-
13
- module Field
14
- def ==(string)
15
- return false unless String === string
16
- self.sub(/#.*/,'').casecmp(string.sub(/#.*/,'')) == 0
17
- end
18
- end
19
-
20
- #{{{ Persistence
21
-
22
- CACHEDIR="/tmp/tsv_persistent_cache"
23
- FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
24
-
25
- def self.cachedir=(cachedir)
26
- CACHEDIR.replace cachedir
27
- FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
28
- end
29
-
30
- def self.cachedir
31
- CACHEDIR
32
- end
33
-
34
-
35
- #{{{ Headers and Field Stuff
36
17
 
37
18
  def self.headers(file, options = {})
38
- if file =~ /(.*)#(.*)/ and File.exists? $1
39
- options.merge! Misc.string2hash $2
19
+
20
+ ## Remove options from filename
21
+ if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
22
+ options = Misc.add_defaults options, Misc.string2hash($2)
40
23
  file = $1
41
24
  end
42
25
 
43
- options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
44
- io = Open.open(file)
45
- line = io.gets
46
- io.close
26
+ fields = case
27
+ when Open.can_open?(file)
28
+ Open.open(file, :grep => options[:grep]) do |f| TSV.parse_header(f, options[:sep], options[:header_hash]).values_at(0, 1).flatten end
29
+ when File === file
30
+ file = Open.grep(file, options[:grep]) if options[:grep]
31
+ TSV.parse_header(file, options[:sep], options[:header_hash]).values_at(0, 1).flatten
32
+ else
33
+ raise "File #{file.inspect} not found"
34
+ end
47
35
 
48
- if line =~ /^#{options[:header_hash]}/
49
- line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
50
- else
36
+ if fields.compact.empty?
51
37
  nil
52
- end
53
- end
54
-
55
- def self.fields_include(key_field, fields, field)
56
- return true if key_field == field or fields.include? field
57
- return false
58
- end
59
-
60
- def self.field_positions(key_field, fields, *selected)
61
- selected.collect do |sel|
62
- case
63
- when (sel.nil? or sel == :main or sel == key_field)
64
- -1
65
- when Integer === sel
66
- sel
67
- else
68
- Misc.field_position fields, sel
69
- end
70
- end
71
- end
72
-
73
- def fields_include(field)
74
- return TSV.fields_include key_field, fields, field
75
- end
76
-
77
- def field_positions(*selected)
78
- return nil if selected.nil? or selected == [nil]
79
- TSV.field_positions(key_field, fields, *selected)
80
- end
81
-
82
- def fields_at(*positions)
83
- return nil if fields.nil?
84
- return nil if positions.nil? or positions == [nil]
85
- (fields + [key_field]).values_at(*positions)
86
- end
87
-
88
- #{{{ Iteration, Merging, etc
89
- def through(new_key_field = nil, new_fields = nil, &block)
90
- new_key_position = (field_positions(new_key_field) || [-1]).first
91
- new_fields = [new_fields] if String === new_fields
92
-
93
- if new_key_position == -1
94
-
95
- if new_fields.nil? or new_fields == fields
96
- each &block
97
- return [key_field, fields]
98
- else
99
- new_field_positions = field_positions(*new_fields)
100
- each do |key, values|
101
- if values.nil?
102
- yield key, nil
103
- else
104
- yield key, values.values_at(*new_field_positions)
105
- end
106
- end
107
- return [key_field, fields_at(*new_field_positions)]
108
- end
109
-
110
- else
111
- new_field_positions = field_positions(*new_fields)
112
-
113
- new_field_names = fields_at(*new_field_positions)
114
- if new_field_names.nil? and fields
115
- new_field_names = fields.dup
116
- new_field_names.delete_at new_key_position
117
- new_field_names.unshift key_field
118
- end
119
-
120
- each do |key, values|
121
- if type == :double
122
- tmp_values = values + [[key]]
123
- else
124
- tmp_values = values + [key]
125
- end
126
-
127
- if new_field_positions.nil?
128
- new_values = values.dup
129
- new_values.delete_at new_key_position
130
- new_values.unshift [key]
131
- else
132
- new_values = tmp_values.values_at(*new_field_positions)
133
- end
134
-
135
- if not Array === tmp_values[new_key_position]
136
- yield tmp_values[new_key_position], NamedArray.name(new_values, new_field_names)
137
- else
138
- tmp_values[new_key_position].each do |new_key|
139
- if new_field_names
140
- yield new_key, NamedArray.name(new_values, new_field_names)
141
- else
142
- yield new_key, new_values
143
- end
144
- end
145
- end
146
- end
147
- return [(fields_at(new_key_position) || [nil]).first, new_field_names]
148
- end
149
- end
150
-
151
- def process(field)
152
- through do |key, values|
153
- values[field].replace yield(values[field], key, values) unless values[field].nil?
154
- end
155
- end
156
-
157
-
158
- def reorder(new_key_field, new_fields = nil, options = {})
159
- options = Misc.add_defaults options
160
- return TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
161
-
162
- new = {}
163
- new_key_field, new_fields = through new_key_field, new_fields do |key, values|
164
- if new[key].nil?
165
- new[key] = values
166
- else
167
- new[key] = new[key].zip(values)
168
- end
169
- end
170
-
171
- new.each do |key,values|
172
- values.each{|list| list.flatten! if Array === list}
173
- end
174
-
175
- if options[:persistence_file]
176
- reordered = TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
177
- reordered.merge! new
178
38
  else
179
- reordered = TSV.new(new, :case_insensitive => case_insensitive)
39
+ fields
180
40
  end
181
-
182
- reordered.key_field = new_key_field
183
- reordered.fields = new_fields
184
-
185
- reordered
186
41
  end
187
42
 
188
- def slice(new_fields, options = {})
189
- reorder(:main, new_fields)
43
+ def self.encapsulate_persistence(file, options)
190
44
  end
191
45
 
192
- def add_field(name = nil)
193
- each do |key, values|
194
- self[key] = values + [yield(key, values)]
195
- end
196
-
197
- if fields != nil
198
- new_fields = fields + [name]
199
- self.fields = new_fields
200
- end
201
- end
202
-
203
- def select(method)
204
- new = TSV.new({})
205
- new.key_field = key_field
206
- new.fields = fields.dup
207
- new.type = type
208
- new.filename = filename + "#Select: #{method.inspect}"
209
- new.case_insensitive = case_insensitive
46
+ def initialize(file = {}, type = nil, options = {})
47
+ # Process Options
210
48
 
211
- case
212
- when Array === method
213
- through do |key, values|
214
- new[key] = values if ([key,values].flatten & method).any?
215
- end
216
- when Regexp === method
217
- through do |key, values|
218
- new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
219
- end
220
- when String === method
221
- through do |key, values|
222
- new[key] = values if [key,values].flatten.select{|v| v == method}.any?
223
- end
224
- when Hash === method
225
- key = method.keys.first
226
- method = method.values.first
227
- case
228
- when (Array === method and (:main == key or key_field == key))
229
- method.each{|item| if values = self[item]; then new[item] = values; end}
230
- when Array === method
231
- through :main, key do |key, values|
232
- new[key] = self[key] if (values.flatten & method).any?
233
- end
234
- when Regexp === method
235
- through :main, key do |key, values|
236
- new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
237
- end
238
- when String === method
239
- through :main, key do |key, values|
240
- new[key] = self[key] if values.flatten.select{|v| v == method}.any?
241
- end
242
- end
243
- end
244
-
245
-
246
- new
247
- end
248
-
249
- def index(options = {})
250
- options = Misc.add_defaults options, :order => false, :persistence => false
251
-
252
- new, extra = Persistence.persist(filename, :Index, :tsv, options) do |filename, options|
253
- new = {}
254
- if options[:order]
255
- new_key_field, new_fields = through options[:target], options[:others] do |key, values|
256
-
257
- values.each_with_index do |list, i|
258
- next if list.nil? or list.empty?
259
-
260
- list = [list] unless Array === list
261
-
262
- list.each do |value|
263
- next if value.nil? or value.empty?
264
- value = value.downcase if options[:case_insensitive]
265
- new[value] ||= []
266
- new[value][i + 1] ||= []
267
- new[value][i + 1] << key
268
- end
269
- new[key] ||= []
270
- new[key][0] = key
271
- end
272
-
273
- end
274
-
275
- new.each do |key, values|
276
- values.flatten!
277
- values.compact!
278
- end
279
-
280
- else
281
- new_key_field, new_fields = through options[:target], options[:others] do |key, values|
282
- new[key] ||= []
283
- new[key] << key
284
- values.each do |list|
285
- next if list.nil?
286
- if Array === list
287
- list.each do |value|
288
- value = value.downcase if options[:case_insensitive]
289
- new[value] ||= []
290
- new[value] << key
291
- end
292
- else
293
- next if list.empty?
294
- value = list
295
- value = value.downcase if options[:case_insensitive]
296
- new[value] ||= []
297
- new[value] << key
298
- end
299
- end
300
- end
301
- end
302
-
303
- [new, {:key_field => new_key_field, :fields => new_fields, :type => :double, :case_insensitive => options[:case_insensitive]}]
304
- end
305
-
306
- new = TSV.new(new)
307
- new.filename = "Index: " + filename + options.inspect
308
- new.fields = extra[:fields]
309
- new.key_field = extra[:key_field]
310
- new.case_insensitive = extra[:case_insensitive]
311
- new.type = extra[:type]
312
- new
313
- end
314
-
315
- def smart_merge(other, match = nil, new_fields = nil)
316
-
317
- new_fields = [new_fields] if String === new_fields
318
- if self.fields and other.fields
319
- common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
320
- new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
321
-
322
- common_fields.delete match if String === match
323
- common_fields.delete_at match if Integer === match
324
-
325
- this_common_field_positions = self.field_positions *common_fields
326
- other_common_field_positions = other.field_positions *common_fields
327
- other_new_field_positions = other.field_positions *new_fields
328
- else
329
- nofieldinfo = true
330
- end
331
-
332
- case
333
- when TSV === match
334
- match_index = match
335
- matching_code_position = nil
336
-
337
- when Array === match
338
- match_index = match.first
339
- matching_code_position = field_positions(match.last).first
340
-
341
- when match =~ /^through:(.*)/
342
- through = $1
343
- if through =~ /(.*)#using:(.*)/
344
- through = $1
345
- matching_code_position = field_positions($2).first
346
- else
347
- matching_code_position = nil
348
- end
349
- index_fields = TSV.headers(through)
350
- target_field = index_fields.select{|field| other.fields_include field}.first
351
- Log.debug "Target Field: #{ target_field }"
352
- match_index = TSV.open_file(through).index(:field => target_field)
353
-
354
- when field_positions(match).first
355
- matching_code_position = field_positions(match).first
356
- match_index = nil
357
- end
358
-
359
- if matching_code_position.nil? and match_index.fields
360
- match_index.fields.each do |field|
361
- if matching_code_position = field_positions(field).first
362
- break
363
- end
364
- end
365
- end
366
-
367
- if match_index and match_index.key_field == other.key_field
368
- other_index = nil
369
- else
370
- other_index = (match === String and other.fields_include(match)) ?
371
- other.index(:other => match, :order => true) : other.index(:order => true)
372
- end
373
-
374
- each do |key,values|
375
- Log.debug "Key: #{ key }. Values: #{values * ", "}"
376
- if matching_code_position.nil? or matching_code_position == -1
377
- matching_codes = [key]
378
- else
379
- matching_codes = values[matching_code_position]
380
- matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
381
- end
382
- Log.debug "Matching codes: #{matching_codes}"
383
-
384
- next if matching_codes.nil?
385
-
386
- matching_codes.each do |matching_code|
387
- if match_index
388
- if match_index[matching_code]
389
- matching_code_fix = match_index[matching_code].first
390
- else
391
- matching_code_fix = nil
392
- end
393
- else
394
- matching_code_fix = matching_code
395
- end
396
-
397
- Log.debug "Matching code (fix): #{matching_code_fix}"
398
- next if matching_code_fix.nil?
399
-
400
- if other_index
401
- Log.debug "Using other_index"
402
- other_codes = other_index[matching_code_fix]
403
- else
404
- other_codes = matching_code_fix
405
- end
406
- Log.debug "Other codes: #{other_codes}"
407
-
408
- next if other_codes.nil? or other_codes.empty?
409
- other_code = other_codes.first
410
-
411
- if nofieldinfo
412
- next if other[other_code].nil?
413
- if type == :double
414
- other_values = [[other_code]] + other[other_code]
415
- else
416
- other_values = [other_code] + other[other_code]
417
- end
418
- other_values.delete_if do |list|
419
- list = [list] unless Array === list
420
- list.collect{|e| case_insensitive ? e.downcase : e }.
421
- select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
422
- end
423
-
424
- new_values = values + other_values
425
- else
426
- if other[other_code].nil?
427
- if type == :double
428
- other_values = [[]] * other.fields.length
429
- else
430
- other_values = [] * other.fields.length
431
- end
432
- else
433
- if type == :double
434
- other_values = other[other_code] + [[other_code]]
435
- else
436
- other_values = other[other_code] + [other_code]
437
- end
438
- end
439
-
440
-
441
- new_values = values.dup
442
-
443
- if type == :double
444
- this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
445
- new_values_tops = new_values[tpos]
446
-
447
- if other.type == :double
448
- new_values_tops += other_values[opos]
449
- else
450
- new_values_tops += [other_values[opos]]
451
- end
452
-
453
- new_values[tpos] = new_values_tops.uniq
454
- end
455
- end
456
-
457
- new_values.concat other_values.values_at *other_new_field_positions
458
- end
459
-
460
- self[key] = new_values
461
- end
462
- end
463
-
464
- self.fields = self.fields + new_fields unless nofieldinfo
465
- end
466
-
467
-
468
- def self.field_matches(tsv, values)
469
- if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
470
- return {}
471
- end
472
-
473
- key_field = tsv.key_field
474
- fields = tsv.fields
475
-
476
- field_values = {}
477
- fields.each{|field|
478
- field_values[field] = []
479
- }
480
-
481
- tsv.through do |key,entry_values|
482
- fields.zip(entry_values).each do |field,entry_field_values|
483
- field_values[field].concat entry_field_values
484
- end
485
- end
486
-
487
- field_values.each do |field,field_value_list|
488
- field_value_list.replace(values & field_value_list.flatten.uniq)
489
- end
490
-
491
- field_values[key_field] = values & tsv.keys
492
-
493
- field_values
494
- end
495
-
496
- def field_matches(values)
497
- TSV.field_matches(self, values)
498
- end
499
-
500
-
501
-
502
- #{{{ Helpers
503
-
504
- def self.index(file, options = {})
505
- options = Misc.add_defaults options, :data_persistence => true, :persistence => true
506
- persistence, persistence_file = Misc.process_options options, :persistence, :persistence_file
507
- options[:persistence], options[:persistence_file] = options.values_at :data_persistence, :data_persistence_file
508
- options.delete :data_persistence
509
- options.delete :data_persistence_file
510
-
511
- index, extra = Persistence.persist(file, :Index, :tsv, options) do |file, options, filename|
512
- TSV.new(file, :double, options).index
513
- end
514
- index
515
- end
516
-
517
- def self.index2(file, options = {})
518
- opt_data = options.dup
519
- opt_index = options.dup
520
- opt_data.delete :field
521
- opt_data.delete :persistence
522
- opt_index.delete :persistence
523
-
524
- opt_data[:persistence] = true if options[:data_persistence]
525
-
526
- opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
527
-
528
- if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
529
- Log.low "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
530
- TSV.new(Persistence::TSV.get(opt_index[:persistence_file], false), opt_index)
531
- else
532
- Log.low "Creating index for #{ file }: #{opt_index[:persistence_file]}"
533
- data = TSV.new(file, opt_data)
534
- data.index(opt_index)
535
- end
536
- end
537
-
538
- def self.open_file(file)
539
- if file =~ /(.*?)#(.*)/
540
- file, options = $1, Misc.string2hash($2.to_s)
541
- else
542
- options = {}
543
- end
544
-
545
- TSV.new(file, options)
546
- end
547
-
548
- #{{{ Accesor Methods
549
- attr_accessor :filename, :type, :case_insensitive, :key_field, :fields, :data
550
-
551
- def fields
552
- return nil if @fields.nil?
553
- fields = @fields
554
- fields.each do |f| f.extend Field end if Array === fields
555
- fields
556
- end
557
-
558
- def fields=(new_fields)
559
- @fields = new_fields
560
- if Persistence::TSV === @data
561
- @data.fields = new_fields
562
- end
563
- end
564
-
565
-
566
-
567
- def keys
568
- @data.keys
569
- end
570
-
571
- def values
572
- @data.values
573
- end
574
-
575
- def size
576
- @data.size
577
- end
578
-
579
- # Write
580
-
581
- def []=(key, value)
582
- key = key.downcase if @case_insensitive
583
- @data[key] = value
584
- end
585
-
586
-
587
- def merge!(new_data)
588
- new_data.each do |key, value|
589
- self[key] = value
590
- end
591
- end
592
-
593
- # Read
594
-
595
- def follow(value)
596
- return nil if value.nil?
597
- if String === value && value =~ /__Ref:(.*)/
598
- return self[$1]
599
- else
600
- value = NamedArray.name value, fields if Array === value and fields
601
- value
602
- end
603
- end
604
-
605
- def [](key)
606
- if Array === key
607
- return @data[key] if @data[key] != nil
608
- key.each{|k| v = self[k]; return v unless v.nil?}
609
- return nil
610
- end
611
-
612
- key = key.downcase if @case_insensitive and key !~ /^__Ref:/
613
- follow @data[key]
614
- end
615
-
616
- def values_at(*keys)
617
- keys.collect{|k|
618
- self[k]
619
- }
620
- end
621
-
622
- def each(&block)
623
- @data.each do |key, value|
624
- block.call(key, follow(value))
625
- end
626
- end
627
-
628
- def collect
629
- if block_given?
630
- @data.collect do |key, value|
631
- value = follow(value)
632
- key, values = yield key, value
633
- end
634
- else
635
- @data.collect do |key, value|
636
- [key, follow(value)]
637
- end
638
- end
639
- end
640
-
641
- def sort(&block)
642
- collect.sort(&block).collect{|p|
643
- key, value = p
644
- value = NamedArray.name value, fields if fields
645
- [key, value]
646
- }
647
- end
648
-
649
- def sort_by(&block)
650
- collect.sort_by &block
651
- end
652
-
653
- def values_to_s(values)
654
- case
655
- when (values.nil? and fields.nil?)
656
- "\n"
657
- when (values.nil? and not fields.nil?)
658
- "\t" << ([""] * fields.length) * "\t" << "\n"
659
- when (not Array === values)
660
- "\t" << values.to_s << "\n"
661
- when Array === values.first
662
- "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
663
- else
664
- "\t" << values * "\t" << "\n"
665
- end
666
- end
667
-
668
- def to_s(keys = nil)
669
- str = ""
670
-
671
- if fields
672
- str << "#" << key_field << "\t" << fields * "\t" << "\n"
673
- end
674
-
675
- if keys.nil?
676
- each do |key, values|
677
- key = key.to_s if Symbol === key
678
- str << key.dup << values_to_s(values)
679
- end
680
- else
681
- keys.zip(values_at(*keys)).each do |key, values|
682
- key = key.to_s if Symbol === key
683
- str << key.dup << values_to_s(values)
684
- end
685
- end
686
-
687
- str
688
- end
689
-
690
- #{{{ Parsing
691
-
692
- def self.parse_fields(io, delimiter = "\t")
693
- return [] if io.nil?
694
- fields = io.split(delimiter, -1)
695
- fields
696
- end
697
-
698
- def self.zip_fields(list, fields = nil)
699
- return [] if list.nil? || list.empty?
700
- fields ||= list.fields if list.respond_to? :fields
701
- zipped = list[0].zip(*list[1..-1])
702
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
703
- zipped
704
- end
705
-
706
- def self.key_order(file, options = {})
707
- # Prepare options
708
- options = add_defaults options,
709
- :sep => "\t",
710
- :sep2 => "|",
711
- :native => 0,
712
- :fix => nil,
713
- :exclude => nil,
714
- :select => nil,
715
- :grep => nil,
716
- :case_insensitive => false,
717
- :header_hash => '#'
718
-
719
- options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
720
-
721
- if String === file and File.exists? file
722
- file = File.open(file)
723
- end
724
-
725
- #{{{ Process first line
726
-
727
- line = file.gets
728
- raise "Empty content" if line.nil?
729
- line.chomp!
730
-
731
- if line =~ /^#{options[:header_hash]}/
732
- header_fields = parse_fields(line, options[:sep])
733
- header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
734
- line = file.gets
735
- else
736
- header_fields = nil
737
- end
738
-
739
- id_pos = Misc.field_position(header_fields, options[:native])
740
-
741
- if options[:extra].nil?
742
- extra_pos = nil
743
- max_cols = 0
744
- else
745
- extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
746
- end
747
-
748
- ids = []
749
- #{{{ Process rest
750
- while line do
751
- line.chomp!
752
-
753
- line = options[:fix].call line if options[:fix]
754
- break if not line
755
-
756
- # Select and fix lines
757
- if line.empty? or
758
- (options[:exclude] and options[:exclude].call(line)) or
759
- (options[:select] and not options[:select].call(line))
760
-
761
- line = file.gets
762
- next
763
- end
764
-
765
- ### Process line
766
-
767
- # Chunk fields
768
- parts = parse_fields(line, options[:sep])
769
-
770
- # Get next line
771
- line = file.gets
772
-
773
- # Get id field
774
- next if parts[id_pos].nil? || parts[id_pos].empty?
775
- ids << parts[id_pos]
776
- end
777
-
778
- ids
779
- end
780
-
781
- def self.parse_header(stream, sep, header_hash)
782
- fields, key_field = nil
783
- options = {}
784
-
785
- line = stream.gets
786
-
787
- if line and line =~ /^#{header_hash}: (.*)/
788
- options = Misc.string2hash $1
789
- line = stream.gets
790
- end
791
-
792
- sep = options[:sep] if options[:sep]
793
-
794
- if line and line =~ /^#{header_hash}/
795
- line.chomp!
796
- fields = parse_fields(line, sep)
797
- key_field = fields.shift
798
- key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
799
- line = stream.gets
800
- end
801
-
802
- raise "Empty content" if line.nil?
803
- return key_field, fields, options, line
804
- end
805
-
806
- def self.parse(stream, options = {})
807
- # Prepare options
808
- options = Misc.add_defaults options,
809
- :case_insensitive => false,
810
- :type => :double,
811
-
812
- :merge => false,
813
- :keep_empty => true,
814
- :cast => nil,
815
-
816
- :sep => "\t",
817
- :sep2 => "|",
818
- :header_hash => '#',
819
-
820
- :key => 0,
821
- :fields => nil,
822
-
823
- :fix => nil,
824
- :exclude => nil,
825
- :select => nil,
826
- :grep => nil
827
-
828
-
829
- sep, header_hash =
830
- Misc.process_options options, :sep, :header_hash
831
-
832
- key_field, other_fields, more_options, line = TSV.parse_header(stream, sep, header_hash)
833
-
834
- sep = more_options[:sep] if more_options[:sep]
835
- options = Misc.add_defaults options, more_options
836
- sep2 = Misc.process_options options, :sep2
837
-
838
- key, others =
839
- Misc.process_options options, :key, :others
840
-
841
- if key_field.nil?
842
- key_pos = key
843
- key_field, fields = nil
844
- else
845
- all_fields = [key_field].concat other_fields
846
-
847
- key_pos = Misc.field_position(all_fields, key)
848
-
849
- if String === others or Symbol === others
850
- others = [others]
851
- end
852
-
853
- if others.nil?
854
- other_pos = (0..(all_fields.length - 1)).to_a
855
- other_pos.delete key_pos
856
- else
857
- other_pos = Misc.field_position(all_fields, *others)
858
- end
859
-
860
- key_field = all_fields[key_pos]
861
- fields = all_fields.values_at *other_pos
862
- end
863
-
864
- case_insensitive, type, merge, keep_empty, cast =
865
- Misc.process_options options, :case_insensitive, :type, :merge, :keep_empty, :cast
866
- fix, exclude, select, grep =
867
- Misc.process_options options, :fix, :exclude, :select, :grep
868
-
869
- #{{{ Process rest
870
- data = {}
871
- single = type.to_sym != :double
872
- max_cols = 0
873
- while line do
874
- line.chomp!
875
-
876
- line = fix.call line if fix
877
- break if not line
878
-
879
- if header_hash and line =~ /^#{header_hash}/
880
- line = stream.gets
881
- next
882
- end
883
-
884
- if line.empty? or
885
- (exclude and exclude.call(line)) or
886
- (select and not select.call(line))
887
-
888
- line = stream.gets
889
- next
890
- end
891
-
892
- # Chunk fields
893
- parts = parse_fields(line, sep)
894
-
895
- # Get next line
896
- line = stream.gets
897
-
898
- # Get id field
899
- next if parts[key_pos].nil? || parts[key_pos].empty?
900
-
901
- if single
902
- ids = parse_fields(parts[key_pos], sep2)
903
- ids.collect!{|id| id.downcase} if case_insensitive
904
-
905
- id = ids.shift
906
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
907
-
908
- if key_field.nil?
909
- other_pos = (0..(parts.length - 1)).to_a
910
- other_pos.delete key_pos
911
- end
912
-
913
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
914
- extra.collect! do |elem|
915
- case
916
- when String === cast
917
- elem.send(cast)
918
- when Proc === cast
919
- cast.call elem
920
- end
921
- end if cast
922
-
923
- max_cols = extra.size if extra.size > (max_cols || 0)
924
- case type
925
- when :list
926
- data[id] = extra unless data.include? id
927
- when :flat
928
- data[id] = extra.flatten unless data.include? id
929
- when :single
930
- data[id] = extra.flatten.first unless data.include? id
931
- end
932
-
933
- else
934
- ids = parse_fields(parts[key_pos], sep2)
935
- ids.collect!{|id| id.downcase} if case_insensitive
936
-
937
- id = ids.shift
938
- ids.each do |id2| data[id2] = "__Ref:#{id}" end
939
-
940
- if key_field.nil?
941
- other_pos = (0..(parts.length - 1)).to_a
942
- other_pos.delete key_pos
943
- end
944
-
945
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
946
- extra.collect! do |list|
947
- case
948
- when String === cast
949
- list.collect{|elem| elem.send(cast)}
950
- when Proc === cast
951
- list.collect{|elem| cast.call elem}
952
- end
953
- end if cast
954
-
955
- max_cols = extra.size if extra.size > (max_cols || 0)
956
- if merge
957
- data[id] = extra unless data.include? id
958
- else
959
- if not data.include? id
960
- data[id] = extra
961
- else
962
- entry = data[id]
963
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
964
- extra.each_with_index do |f, i|
965
- if f.empty?
966
- next unless keep_empty
967
- f= [""]
968
- end
969
- entry[i] ||= []
970
- entry[i] = entry[i].concat f
971
- end
972
- data[id] = entry
973
- end
974
- end
975
- end
976
- end
977
-
978
- if keep_empty and max_cols > 0
979
- data.each do |key, values|
980
- next if values =~ /__Ref:/
981
- new_values = values
982
- max_cols.times do |i|
983
- if type == :double
984
- new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
985
- else
986
- new_values[i] = "" if new_values[i].nil?
987
- end
988
- end
989
- data[key] = new_values
990
- end
991
- end
992
-
993
- [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive}]
994
- end
995
-
996
- def self.parse2(data, file, options = {})
997
-
998
- # Prepare options
999
- options = Misc.add_defaults options,
1000
- :sep => "\t",
1001
- :sep2 => "|",
1002
- :native => 0,
1003
- :extra => nil,
1004
- :fix => nil,
1005
- :exclude => nil,
1006
- :select => nil,
1007
- :grep => nil,
1008
- :single => false,
1009
- :unique => false,
1010
- :merge => false,
1011
- :flatten => false,
1012
- :keep_empty => true,
1013
- :case_insensitive => false,
1014
- :header_hash => '#' ,
1015
- :cast => nil,
1016
- :persistence_file => nil
1017
-
1018
-
1019
- options[:unique] = options[:uniq] if options[:unique].nil?
1020
- options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
1021
- options[:flatten] = true if options[:single]
1022
-
1023
- #{{{ Process first line
1024
-
1025
- line = file.gets
1026
- raise "Empty content" if line.nil?
1027
- line.chomp!
1028
-
1029
- if line =~ /^#{options[:header_hash]}/
1030
- header_fields = parse_fields(line, options[:sep])
1031
- header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
1032
- line = file.gets
1033
- else
1034
- header_fields = nil
1035
- end
1036
-
1037
- id_pos = Misc.field_position(header_fields, options[:native])
1038
-
1039
- if options[:extra].nil?
1040
- extra_pos = nil
1041
- max_cols = 0
1042
- else
1043
- extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
1044
- end
1045
-
1046
- #{{{ Process rest
1047
- while line do
1048
- line.chomp!
1049
-
1050
- line = options[:fix].call line if options[:fix]
1051
- break if not line
1052
-
1053
- if options[:header_hash] && line =~ /^#{options[:header_hash]}/
1054
- line = file.gets
1055
- next
1056
- end
1057
-
1058
- # Select and fix lines
1059
- if line.empty? or
1060
- (options[:exclude] and options[:exclude].call(line)) or
1061
- (options[:select] and not options[:select].call(line))
1062
-
1063
- line = file.gets
1064
- next
1065
- end
1066
-
1067
- ### Process line
1068
-
1069
- # Chunk fields
1070
- parts = parse_fields(line, options[:sep])
1071
-
1072
- # Get next line
1073
- line = file.gets
1074
-
1075
- # Get id field
1076
- next if parts[id_pos].nil? || parts[id_pos].empty?
1077
- ids = parse_fields(parts[id_pos], options[:sep2])
1078
- ids.collect!{|id| id.downcase } if options[:case_insensitive]
1079
-
1080
- # Get extra fields
1081
-
1082
- if options[:extra].nil? and not (options[:flatten] or options[:single])
1083
- extra = parts
1084
- extra.delete_at(id_pos)
1085
- max_cols = extra.size if extra.size > (max_cols || 0)
1086
- else
1087
- if extra_pos.nil?
1088
- extra = parts
1089
- extra.delete_at id_pos
1090
- else
1091
- extra = parts.values_at(*extra_pos)
1092
- end
1093
- end
1094
-
1095
- extra.collect!{|value| parse_fields(value, options[:sep2])}
1096
- extra.collect!{|values| values.first} if options[:unique]
1097
- extra.flatten! if options[:flatten]
1098
- extra = extra.first if options[:single]
1099
-
1100
- if options[:cast]
1101
- if Array === extra[0]
1102
- e = extra
1103
- else
1104
- e = [extra]
1105
- end
1106
-
1107
- e.each do |list|
1108
- case
1109
- when String === options[:cast]
1110
- list.collect!{|elem| elem.send(options[:cast])}
1111
- when Proc === options[:cast]
1112
- list.collect!{|elem| options[:cast].call elem}
1113
- end
1114
- end
1115
- end
1116
-
1117
- main_entry = ids.shift
1118
- ids.each do |id| data[id] = "__Ref:#{main_entry}" end
1119
-
1120
- case
1121
- when (options[:single] or options[:unique] or not options[:merge])
1122
- data[main_entry] = extra unless data.include? main_entry
1123
- when options[:flatten]
1124
- entry = data[main_entry]
1125
-
1126
- if entry.nil?
1127
- data[main_entry] = extra
1128
- else
1129
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
1130
- if Persistence::TSV === data
1131
- data[main_entry] = entry.concat extra
1132
- else
1133
- data[main_entry].concat extra
1134
- end
1135
- end
1136
- else
1137
- entry = data[main_entry]
1138
- if entry.nil?
1139
- data[main_entry] = extra
1140
- else
1141
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
1142
- extra.each_with_index do |fields, i|
1143
- if fields.empty?
1144
- next unless options[:keep_empty]
1145
- fields = [""]
1146
- end
1147
- entry[i] ||= []
1148
- entry[i] = entry[i].concat fields
1149
- end
1150
- data[main_entry] = entry
1151
- end
1152
- end
1153
- end
1154
-
1155
- if options[:keep_empty] and not max_cols.nil?
1156
- data.each do |key,values|
1157
- new_values = values
1158
- max_cols.times do |i|
1159
- new_values[i] ||= [""]
1160
- end
1161
- data[key] = new_values
1162
- end
1163
- end
1164
-
1165
- # Save header information
1166
- key_field = nil
1167
- fields = nil
1168
- if header_fields && header_fields.any?
1169
- key_field = header_fields[id_pos]
1170
- if extra_pos.nil?
1171
- fields = header_fields
1172
- fields.delete_at(id_pos)
1173
- else
1174
- fields = header_fields.values_at(*extra_pos)
1175
- end
1176
- end
1177
-
1178
- data.read if Persistence::TSV === data
1179
-
1180
- [key_field, fields]
1181
- end
1182
- def initialize(file = {}, type = :double, options = {})
1183
49
  if Hash === type
1184
50
  options = type
1185
- type = :double
51
+ type = nil
1186
52
  end
1187
53
 
54
+ ## Remove options from filename
1188
55
  if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
1189
56
  options = Misc.add_defaults options, Misc.string2hash($2)
1190
57
  file = $1
1191
58
  end
1192
59
 
1193
- options = Misc.add_defaults options, :persistence => false, :case_insensitive => false, :type => type
60
+ options = Misc.add_defaults options, :persistence => false, :type => type
61
+
62
+ # Extract Filename
63
+
64
+ file, extra = file if Array === file and file.length == 2 and Hash === file.last
1194
65
 
1195
66
  @filename = Misc.process_options options, :filename
1196
67
  @filename ||= case
68
+ when Path === file
69
+ file
1197
70
  when (String === file and File.exists? file)
1198
71
  File.expand_path file
72
+ when String === file
73
+ file
1199
74
  when File === file
1200
75
  File.expand_path file.path
76
+ when TSV === file
77
+ File.expand_path file.filename
78
+ when (Persistence::TSV === file and file.filename)
79
+ File.expand_path file.filename
1201
80
  else
1202
- Digest::MD5.hexdigest(file.inspect)
81
+ file.class.to_s
1203
82
  end
1204
83
 
1205
- if block_given?
1206
- @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options| yield filename, options end
84
+ # Process With Persistence
85
+ # Use filename to identify the persistence
86
+ # Several inputs supported
87
+ # Filename or File: Parsed
88
+ # Hash: Encapsulated, empty info
89
+ # TSV: Duplicate
90
+ case
91
+ when block_given?
92
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options.merge(:force_array => true)) do |filename, options| yield filename, options end
93
+ extra.each do |key, values|
94
+ self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
95
+ end if not extra.nil?
96
+
1207
97
  else
1208
- @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options|
1209
- data, extra = nil
1210
- case
1211
- when String === file
1212
- File.open(file) do |f|
1213
- data, extra = TSV.parse(f, options)
98
+ case
99
+ when Hash === file
100
+ @data = file
101
+ when TSV === file
102
+ @data = file.data
103
+ when Persistence::TSV === file
104
+ @data = file
105
+ %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
106
+ if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
107
+ self.send "#{key}=".to_sym, @data.send(key.to_sym)
1214
108
  end
1215
- when File === file
1216
- data, extra = TSV.parse(file, options)
1217
- when Hash === file
1218
- data = file
1219
- extra = {:case_insensitive => options[:case_insensitive], :type => type}
1220
109
  end
1221
-
1222
- [data, extra]
1223
- end
1224
- end
1225
-
1226
- @type = extra[:type]
1227
- @key_field = extra[:key_field]
1228
- @fields = extra[:fields]
1229
- @case_insensitive = extra[:case_insensitive]
1230
- end
1231
-
1232
- def initialize2(file = {}, options = {})
1233
- options = Misc.add_defaults options
1234
- options[:persistence] = true if options[:persistence_file]
1235
-
1236
- if String === file && file =~ /(.*?)#(.*)/
1237
- file, file_options = $1, $2
1238
- options = Misc.add_defaults file_options, options
1239
- end
1240
-
1241
- @case_insensitive = options[:case_insensitive] == true
1242
- @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
1243
-
1244
- case
1245
- when TSV === file
1246
- Log.low "Copying TSV"
1247
- @filename = file.filename
1248
-
1249
- if options[:persistence] and not Persistence::TSV === file.data
1250
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
1251
- Log.low "Making persistance #{ persistence_file }"
1252
- @data = TCHash.get(persistence_file)
1253
- @data.merge! file
1254
- @data.key_field = file.key_field
1255
- @data.fields = file.fields
1256
- else
1257
- @data = file.data
1258
- end
1259
-
1260
- @key_field = file.key_field
1261
- @fields = file.fields
1262
- @case_insensitive = file.case_insensitive
1263
- @list = file.list
1264
- return self
1265
- when Hash === file
1266
- Log.low "Encapsulating Hash in TSV object"
1267
- @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
1268
- if options[:persistence]
1269
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
1270
- Log.low "Making persistance #{ persistence_file }"
1271
- @data = TCHash.get(persistence_file)
1272
- @data.merge! file
1273
110
  else
1274
- @data = file
1275
- end
1276
- return self
1277
- when Persistence::TSV === file
1278
- Log.low "Encapsulating Persistence::TSV"
1279
- @filename = "Persistence::TSV:" + Digest::MD5.hexdigest(file.inspect)
1280
- @data = file
1281
- @key_field = file.key_field
1282
- @fields = file.fields
1283
- return self
1284
- when File === file
1285
- @filename = File.expand_path file.path
1286
- when String === file && File.exists?(file)
1287
- @filename = File.expand_path file
1288
- file = Open.open(file)
1289
- when StringIO
1290
- else
1291
- raise "File #{file} not found"
1292
- end
111
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options) do |file, options, filename|
112
+ data, extra = nil
1293
113
 
1294
- if options[:persistence]
1295
- options.delete :persistence
1296
- persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
114
+ case
115
+ ## Parse source
116
+ when (String === file and file.respond_to? :open)
117
+ data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
118
+ extra[:namespace] ||= file.namespace
119
+ extra[:datadir] ||= file.datadir
120
+ when StringIO === file
121
+ data, extra = TSV.parse(file, options)
122
+ when Open.can_open?(file)
123
+ Open.open(file, :grep => options[:grep]) do |f|
124
+ data, extra = TSV.parse(f, options)
125
+ end
126
+ #extra[:namespace] = File.basename(File.dirname(filename))
127
+ #extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
128
+ when File === file
129
+ file = Open.grep(file, options[:grep]) if options[:grep]
130
+ data, extra = TSV.parse(file, options)
131
+ extra[:namespace] = File.basename(File.dirname(file.filename))
132
+ extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
133
+ ## Encapsulate Hash or TSV
134
+ when block_given?
135
+ data
136
+ else
137
+ raise "Unknown input in TSV.new #{file.inspect}"
138
+ end
1297
139
 
1298
- if File.exists? persistence_file
1299
- Log.low "Loading Persistence for #{ @filename } in #{persistence_file}"
1300
- @data = Persistence::TSV.get(persistence_file, false)
1301
- @key_field = @data.key_field
1302
- @fields = @data.fields
1303
- else
1304
- @data = Persistence::TSV.get(persistence_file, true)
1305
- file = Open.grep(file, options[:grep]) if options[:grep]
140
+ extra[:filename] = filename
1306
141
 
1307
- Log.low "Persistent Parsing for #{ @filename } in #{persistence_file}"
1308
- @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
1309
- @data.key_field = @key_field
1310
- @data.fields = @fields
1311
- @data.read
142
+ [data, extra]
143
+ end
1312
144
  end
1313
- else
1314
- Log.low "Non-persistent parsing for #{ @filename }"
1315
- @data = {}
1316
- file = Open.grep(file, options[:grep]) if options[:grep]
1317
- @key_field, @fields = TSV.parse(@data, file, options)
1318
145
  end
1319
146
 
1320
- file.close
1321
- @case_insensitive = options[:case_insensitive] == true
147
+ if not extra.nil?
148
+ %w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
149
+ if extra.include? key.to_sym
150
+ self.send("#{key}=".to_sym, extra[key.to_sym])
151
+ if @data.respond_to? "#{key}=".to_sym
152
+ @data.send("#{key}=".to_sym, extra[key.to_sym])
153
+ end
154
+ end
155
+ end
156
+ end
1322
157
  end
1323
158
 
1324
- end
1325
-
1326
- #{{{ CacheHelper
1327
- require 'rbbt/util/cachehelper'
1328
- module CacheHelper
1329
- def self.tsv_cache(name, key = [])
1330
- cache_file = CacheHelper.build_filename name, key
159
+ def write
160
+ @data.write if @data.respond_to? :write
161
+ end
1331
162
 
1332
- if File.exists? cache_file
1333
- Log.debug "TSV cache file '#{cache_file}' found"
1334
- hash = TCHash.get(cache_file)
1335
- TSV.new(hash)
1336
- else
1337
- Log.debug "Producing TSV cache file '#{cache_file}'"
1338
- data = yield
1339
- TSV.new(data, :persistence_file => cache_file)
1340
- end
163
+ def read
164
+ @data.read if @data.respond_to? :read
1341
165
  end
166
+
1342
167
  end