rbbt-util 1.2.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt-util.rb +2 -1
- data/lib/rbbt/util/R.rb +18 -1
- data/lib/rbbt/util/cmd.rb +7 -6
- data/lib/rbbt/util/data_module.rb +31 -11
- data/lib/rbbt/util/fix_width_table.rb +209 -0
- data/lib/rbbt/util/log.rb +12 -2
- data/lib/rbbt/util/misc.rb +91 -12
- data/lib/rbbt/util/open.rb +18 -9
- data/lib/rbbt/util/path.rb +152 -0
- data/lib/rbbt/util/persistence.rb +282 -75
- data/lib/rbbt/util/pkg_data.rb +16 -59
- data/lib/rbbt/util/pkg_software.rb +15 -1
- data/lib/rbbt/util/rake.rb +5 -1
- data/lib/rbbt/util/tc_hash.rb +129 -59
- data/lib/rbbt/util/tsv.rb +109 -1284
- data/lib/rbbt/util/tsv/accessor.rb +273 -0
- data/lib/rbbt/util/tsv/attach.rb +228 -0
- data/lib/rbbt/util/tsv/index.rb +303 -0
- data/lib/rbbt/util/tsv/manipulate.rb +271 -0
- data/lib/rbbt/util/tsv/parse.rb +258 -0
- data/share/lib/R/util.R +5 -3
- data/test/rbbt/util/test_R.rb +9 -1
- data/test/rbbt/util/test_data_module.rb +5 -0
- data/test/rbbt/util/test_fix_width_table.rb +107 -0
- data/test/rbbt/util/test_misc.rb +43 -0
- data/test/rbbt/util/test_open.rb +0 -1
- data/test/rbbt/util/test_path.rb +10 -0
- data/test/rbbt/util/test_persistence.rb +63 -2
- data/test/rbbt/util/test_pkg_data.rb +29 -8
- data/test/rbbt/util/test_tc_hash.rb +52 -0
- data/test/rbbt/util/test_tsv.rb +55 -678
- data/test/rbbt/util/tsv/test_accessor.rb +109 -0
- data/test/rbbt/util/tsv/test_attach.rb +271 -0
- data/test/rbbt/util/tsv/test_index.rb +158 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
- data/test/rbbt/util/tsv/test_parse.rb +72 -0
- data/test/test_helper.rb +1 -0
- metadata +25 -4
data/lib/rbbt/util/pkg_data.rb
CHANGED
@@ -1,58 +1,13 @@
|
|
1
1
|
require 'rbbt/util/open'
|
2
2
|
require 'rbbt/util/tsv'
|
3
3
|
require 'rbbt/util/log'
|
4
|
+
require 'rbbt/util/path'
|
4
5
|
require 'rbbt/util/rake'
|
5
6
|
|
6
7
|
module PKGData
|
7
8
|
attr_accessor :claims
|
8
|
-
def self.extended(
|
9
|
-
|
10
|
-
end
|
11
|
-
|
12
|
-
module Path
|
13
|
-
attr_accessor :base
|
14
|
-
|
15
|
-
def method_missing(name, *args, &block)
|
16
|
-
new = File.join(self.dup, name.to_s)
|
17
|
-
new.extend Path
|
18
|
-
new.base = base
|
19
|
-
new
|
20
|
-
end
|
21
|
-
|
22
|
-
def [](name)
|
23
|
-
new = File.join(self.dup, name.to_s)
|
24
|
-
new.extend Path
|
25
|
-
new.base = base
|
26
|
-
new
|
27
|
-
end
|
28
|
-
|
29
|
-
def tsv(options = {})
|
30
|
-
produce
|
31
|
-
TSV.new self, options
|
32
|
-
end
|
33
|
-
|
34
|
-
def index(field = nil, other = nil, options = {})
|
35
|
-
produce
|
36
|
-
TSV.index self, options.merge(:target => field, :others => other)
|
37
|
-
end
|
38
|
-
|
39
|
-
def open
|
40
|
-
produce
|
41
|
-
Open.open(self)
|
42
|
-
end
|
43
|
-
|
44
|
-
def read
|
45
|
-
produce
|
46
|
-
Open.read(self)
|
47
|
-
end
|
48
|
-
|
49
|
-
def produce
|
50
|
-
return if File.exists? self
|
51
|
-
|
52
|
-
Log.debug("Trying to produce '#{ self }'")
|
53
|
-
file, producer = base.reclaim self
|
54
|
-
base.produce(self, producer[:get], producer[:subdir], producer[:sharedir])
|
55
|
-
end
|
9
|
+
def self.extended(pkg_module)
|
10
|
+
pkg_module.claims = {}
|
56
11
|
end
|
57
12
|
|
58
13
|
class SharedirNotFoundError < StandardError; end
|
@@ -84,19 +39,18 @@ module PKGData
|
|
84
39
|
|
85
40
|
def files
|
86
41
|
path = datadir.dup.extend Path
|
87
|
-
path.
|
42
|
+
path.pkg_module = self
|
43
|
+
path.datadir = datadir
|
88
44
|
path
|
89
45
|
end
|
90
46
|
|
91
47
|
def in_datadir?(file)
|
92
|
-
|
93
|
-
true
|
94
|
-
else
|
95
|
-
false
|
96
|
-
end
|
48
|
+
Misc.in_directory? file, datadir
|
97
49
|
end
|
98
50
|
|
99
|
-
|
51
|
+
# file is the complete path of the file inside the datadir
|
52
|
+
# get is the get method. :Rakefile for
|
53
|
+
def claim(file, get = nil, subdir = nil, namespace = nil, sharedir = nil)
|
100
54
|
file = case
|
101
55
|
when (file.nil? or file === :all)
|
102
56
|
File.join(datadir, subdir.to_s)
|
@@ -107,7 +61,7 @@ module PKGData
|
|
107
61
|
end
|
108
62
|
|
109
63
|
sharedir ||= PKGData.get_caller_sharedir
|
110
|
-
claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir}
|
64
|
+
claims[file] = {:get => get, :subdir => subdir, :sharedir => sharedir, :namespace => namespace}
|
111
65
|
produce(file, get, subdir, sharedir) if TSV === get
|
112
66
|
produce(file, get, subdir, sharedir) if String === get and not File.exists?(get) and reclaim(file).nil? and not File.basename(get.to_s) == "Rakefile"
|
113
67
|
end
|
@@ -139,9 +93,12 @@ module PKGData
|
|
139
93
|
|
140
94
|
FileUtils.mkdir_p File.dirname(file) unless File.exists?(File.dirname(file))
|
141
95
|
|
96
|
+
relative_path = Misc.path_relative_to file, datadir
|
142
97
|
case
|
143
98
|
when get.nil?
|
144
|
-
FileUtils.cp File.join(sharedir,
|
99
|
+
FileUtils.cp File.join(sharedir, relative_path), file.to_s
|
100
|
+
when StringIO === get
|
101
|
+
Open.write(file, get.read)
|
145
102
|
when Proc === get
|
146
103
|
Open.write(file, get.call)
|
147
104
|
when TSV === get
|
@@ -153,10 +110,10 @@ module PKGData
|
|
153
110
|
rakefile = File.join(sharedir, get.to_s)
|
154
111
|
end
|
155
112
|
produce_with_rake(rakefile, subdir, file)
|
156
|
-
when String === get
|
113
|
+
when (String === get and Open.remote? get)
|
157
114
|
Open.write(file, Open.read(get, :wget_options => {:pipe => true}, :nocache => true))
|
158
115
|
else
|
159
|
-
raise "Unknown Get: #{get.class}"
|
116
|
+
raise "Unknown Get: #{get.class} #{get}"
|
160
117
|
end
|
161
118
|
end
|
162
119
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt/util/open'
|
2
|
+
require 'rbbt/util/misc'
|
2
3
|
require 'rbbt/util/tsv'
|
3
4
|
require 'rbbt/util/log'
|
4
5
|
require 'rbbt/util/cmd'
|
@@ -52,7 +53,20 @@ module PKGSoftware
|
|
52
53
|
|
53
54
|
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
54
55
|
|
55
|
-
|
56
|
+
case
|
57
|
+
when get == :directory
|
58
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
|
59
|
+
subdir = Misc.path_relative_to File.dirname(path), opt_dir
|
60
|
+
source = File.join(sharedir, 'install/software', subdir, pkg)
|
61
|
+
|
62
|
+
FileUtils.cp_r File.join(sharedir, 'install/software', subdir, pkg), path
|
63
|
+
when get == :binary
|
64
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists? File.dirname(path)
|
65
|
+
subdir = Misc.path_relative_to File.dirname(path), opt_dir
|
66
|
+
source = File.join(sharedir, 'install/software', subdir, pkg)
|
67
|
+
|
68
|
+
FileUtils.cp File.join(sharedir, 'install/software', subdir, pkg), path
|
69
|
+
when (get.nil? or get.empty?)
|
56
70
|
CMD.cmd("#{File.join(sharedir, 'install', 'software', pkg)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}", :stderr => Log::HIGH)
|
57
71
|
else
|
58
72
|
CMD.cmd("#{File.join(sharedir, 'install', 'software', get)} #{File.join(Rbbt.rootdir, 'share/install/software/lib', 'install_helpers')} #{software_dir}")
|
data/lib/rbbt/util/rake.rb
CHANGED
data/lib/rbbt/util/tc_hash.rb
CHANGED
@@ -1,39 +1,143 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
1
2
|
require 'tokyocabinet'
|
2
3
|
|
3
4
|
class TCHash < TokyoCabinet::HDB
|
4
5
|
class OpenError < StandardError;end
|
5
6
|
class KeyFormatError < StandardError;end
|
6
7
|
|
7
|
-
|
8
|
+
class StringSerializer
|
9
|
+
def self.dump(str); str.to_s; end
|
10
|
+
def self.load(str); str; end
|
11
|
+
end
|
12
|
+
|
13
|
+
class StringArraySerializer
|
14
|
+
def self.dump(array)
|
15
|
+
array.collect{|a| a.to_s} * "\t"
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.load(string)
|
19
|
+
string.split(/\t/)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class StringDoubleArraySerializer
|
24
|
+
def self.dump(array)
|
25
|
+
array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.load(string)
|
29
|
+
string.split(/\t/).collect{|l| l.split("|")}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
ALIAS = {:marshal => Marshal, nil => Marshal, :single => StringSerializer, :list => StringArraySerializer, :double => StringDoubleArraySerializer}
|
35
|
+
|
36
|
+
CONNECTIONS = {}
|
8
37
|
|
9
38
|
FIELD_INFO_ENTRIES = {
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
39
|
+
:type => '__tokyocabinet_hash_type',
|
40
|
+
:serializer => '__tokyocabinet_hash_serializer',
|
41
|
+
:identifiers => '__tokyocabinet_hash_identifiers',
|
42
|
+
:datadir => '__tokyocabinet_hash_datadir',
|
43
|
+
:fields => '__tokyocabinet_hash_fields',
|
44
|
+
:key_field => '__tokyocabinet_hash_key_field',
|
45
|
+
:filename => '__tokyocabinet_hash_filename',
|
46
|
+
:namespace => '__tokyocabinet_hash_namspace',
|
47
|
+
:type => '__tokyocabinet_hash_type',
|
48
|
+
:case_insensitive => '__tokyocabinet_hash_case_insensitive'
|
15
49
|
}
|
16
|
-
CONNECTIONS = {}
|
17
50
|
|
18
51
|
FIELD_INFO_ENTRIES.each do |entry, key|
|
19
52
|
class_eval do
|
20
|
-
define_method entry.to_s, proc{self
|
21
|
-
define_method entry.to_s + "=", proc{|value| write unless write?; self
|
53
|
+
define_method entry.to_s, proc{v = self.original_get_brackets(key); v.nil? ? nil : Marshal.load(v)}
|
54
|
+
define_method entry.to_s + "=", proc{|value| write unless write?; self.original_set_brackets key, Marshal.dump(value)}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def serializer
|
59
|
+
@serializer
|
60
|
+
end
|
61
|
+
|
62
|
+
def serializer=(value)
|
63
|
+
self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer],value) unless value.nil?
|
64
|
+
end
|
65
|
+
|
66
|
+
alias original_open open
|
67
|
+
def open(write = false)
|
68
|
+
flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
|
69
|
+
if !self.original_open(@path_to_db, flags)
|
70
|
+
ecode = self.ecode
|
71
|
+
raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
|
72
|
+
end
|
73
|
+
|
74
|
+
@write = write
|
75
|
+
|
76
|
+
if write
|
77
|
+
self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s) unless @serializer.nil?
|
78
|
+
else
|
79
|
+
serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
|
80
|
+
|
81
|
+
if serializer_str.nil? or serializer_str.empty?
|
82
|
+
@serializer = Marshal
|
83
|
+
else
|
84
|
+
mod = Misc.string2const serializer_str
|
85
|
+
@serializer = mod
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def write?
|
91
|
+
@write
|
92
|
+
end
|
93
|
+
|
94
|
+
def write
|
95
|
+
self.close
|
96
|
+
self.open(true)
|
97
|
+
end
|
98
|
+
|
99
|
+
def read
|
100
|
+
self.close
|
101
|
+
self.open(false)
|
102
|
+
end
|
103
|
+
|
104
|
+
def initialize(path, write = false, serializer = Marshal)
|
105
|
+
super()
|
106
|
+
|
107
|
+
serializer = ALIAS[serializer] if ALIAS.include? serializer
|
108
|
+
|
109
|
+
@path_to_db = path
|
110
|
+
@serializer = serializer
|
111
|
+
|
112
|
+
if write || ! File.exists?(@path_to_db)
|
113
|
+
self.open(true)
|
114
|
+
else
|
115
|
+
self.open(false)
|
22
116
|
end
|
23
117
|
end
|
24
118
|
|
119
|
+
def self.get(path, write = false, serializer = Marshal)
|
120
|
+
serializer = ALIAS[serializer] if ALIAS.include? serializer
|
121
|
+
@serializer = serializer
|
122
|
+
d = CONNECTIONS[path] ||= self.new(path, false, @serializer)
|
123
|
+
write ? d.write : d.read
|
124
|
+
d
|
125
|
+
end
|
126
|
+
|
127
|
+
#{{{ ACESSORS
|
128
|
+
|
25
129
|
alias original_get_brackets []
|
26
130
|
def [](key)
|
27
131
|
return nil unless String === key
|
28
132
|
result = self.original_get_brackets(key)
|
29
|
-
result ?
|
133
|
+
result ? @serializer.load(result) : nil
|
30
134
|
end
|
31
135
|
|
32
136
|
alias original_set_brackets []=
|
33
137
|
def []=(key,value)
|
34
138
|
raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
|
35
|
-
|
36
|
-
self.original_set_brackets(key,
|
139
|
+
raise "Closed TCHash connection" unless write?
|
140
|
+
self.original_set_brackets(key, serializer.dump(value))
|
37
141
|
end
|
38
142
|
|
39
143
|
def values_at(*args)
|
@@ -57,22 +161,21 @@ class TCHash < TokyoCabinet::HDB
|
|
57
161
|
indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
|
58
162
|
indexes.each do |index| values.delete_at index end
|
59
163
|
|
60
|
-
values.collect{|v|
|
164
|
+
values.collect{|v| serializer.load(v)}
|
61
165
|
end
|
62
166
|
|
63
167
|
# This version of each fixes a problem in ruby 1.9. It also
|
64
168
|
# removes the special entries
|
65
|
-
def
|
66
|
-
values = self.original_values
|
169
|
+
def each(&block)
|
170
|
+
values = self.original_values
|
67
171
|
keys = self.original_keys
|
68
172
|
indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
|
69
173
|
indexes.sort.reverse.each do |index| values.delete_at(index); keys.delete_at(index) end
|
70
174
|
|
71
|
-
keys.zip(values).each &block
|
175
|
+
keys.zip(values.collect{|v| serializer.load v}).each &block
|
72
176
|
end
|
73
177
|
|
74
178
|
alias original_each each
|
75
|
-
alias each each19
|
76
179
|
|
77
180
|
def collect
|
78
181
|
res = []
|
@@ -81,50 +184,17 @@ class TCHash < TokyoCabinet::HDB
|
|
81
184
|
end
|
82
185
|
|
83
186
|
def merge!(data)
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
|
93
|
-
if !self.original_open(@path_to_db, flags)
|
94
|
-
ecode = self.ecode
|
95
|
-
raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
|
96
|
-
end
|
97
|
-
@write = write
|
98
|
-
end
|
99
|
-
|
100
|
-
def write?
|
101
|
-
@write
|
102
|
-
end
|
103
|
-
|
104
|
-
def write
|
105
|
-
self.close
|
106
|
-
self.open(true)
|
107
|
-
end
|
108
|
-
|
109
|
-
def read
|
110
|
-
self.close
|
111
|
-
self.open(false)
|
112
|
-
end
|
113
|
-
|
114
|
-
def initialize(path, write = false)
|
115
|
-
super()
|
116
|
-
@path_to_db = path
|
117
|
-
|
118
|
-
if write || ! File.exists?(@path_to_db)
|
119
|
-
self.open(true)
|
187
|
+
raise "Closed TCHash connection" unless write?
|
188
|
+
serialized =
|
189
|
+
data.collect{|key, values| [key.to_s, serializer.dump(values)]}
|
190
|
+
if tranbegin
|
191
|
+
serialized.each do |key, values|
|
192
|
+
self.putasync(key, values)
|
193
|
+
end
|
194
|
+
trancommit
|
120
195
|
else
|
121
|
-
|
196
|
+
raise "Transaction cannot initiate"
|
122
197
|
end
|
123
198
|
end
|
124
199
|
|
125
|
-
def self.get(path, write = false)
|
126
|
-
d = CONNECTIONS[path] ||= self.new(path, false)
|
127
|
-
write ? d.write : d.read
|
128
|
-
d
|
129
|
-
end
|
130
200
|
end
|
data/lib/rbbt/util/tsv.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
2
|
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/path'
|
3
4
|
require 'rbbt/util/tc_hash'
|
4
5
|
require 'rbbt/util/tmpfile'
|
5
6
|
require 'rbbt/util/log'
|
@@ -7,1336 +8,160 @@ require 'rbbt/util/persistence'
|
|
7
8
|
require 'digest'
|
8
9
|
require 'fileutils'
|
9
10
|
|
11
|
+
require 'rbbt/util/tsv/parse'
|
12
|
+
require 'rbbt/util/tsv/accessor'
|
13
|
+
require 'rbbt/util/tsv/manipulate'
|
14
|
+
require 'rbbt/util/tsv/index'
|
15
|
+
require 'rbbt/util/tsv/attach'
|
10
16
|
class TSV
|
11
|
-
class FieldNotFoundError < StandardError;end
|
12
|
-
|
13
|
-
module Field
|
14
|
-
def ==(string)
|
15
|
-
return false unless String === string
|
16
|
-
self.sub(/#.*/,'').casecmp(string.sub(/#.*/,'')) == 0
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
#{{{ Persistence
|
21
|
-
|
22
|
-
CACHEDIR="/tmp/tsv_persistent_cache"
|
23
|
-
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
24
|
-
|
25
|
-
def self.cachedir=(cachedir)
|
26
|
-
CACHEDIR.replace cachedir
|
27
|
-
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.cachedir
|
31
|
-
CACHEDIR
|
32
|
-
end
|
33
|
-
|
34
|
-
|
35
|
-
#{{{ Headers and Field Stuff
|
36
17
|
|
37
18
|
def self.headers(file, options = {})
|
38
|
-
|
39
|
-
|
19
|
+
|
20
|
+
## Remove options from filename
|
21
|
+
if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
|
22
|
+
options = Misc.add_defaults options, Misc.string2hash($2)
|
40
23
|
file = $1
|
41
24
|
end
|
42
25
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
26
|
+
fields = case
|
27
|
+
when Open.can_open?(file)
|
28
|
+
Open.open(file, :grep => options[:grep]) do |f| TSV.parse_header(f, options[:sep], options[:header_hash]).values_at(0, 1).flatten end
|
29
|
+
when File === file
|
30
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
31
|
+
TSV.parse_header(file, options[:sep], options[:header_hash]).values_at(0, 1).flatten
|
32
|
+
else
|
33
|
+
raise "File #{file.inspect} not found"
|
34
|
+
end
|
47
35
|
|
48
|
-
if
|
49
|
-
line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
|
50
|
-
else
|
36
|
+
if fields.compact.empty?
|
51
37
|
nil
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.fields_include(key_field, fields, field)
|
56
|
-
return true if key_field == field or fields.include? field
|
57
|
-
return false
|
58
|
-
end
|
59
|
-
|
60
|
-
def self.field_positions(key_field, fields, *selected)
|
61
|
-
selected.collect do |sel|
|
62
|
-
case
|
63
|
-
when (sel.nil? or sel == :main or sel == key_field)
|
64
|
-
-1
|
65
|
-
when Integer === sel
|
66
|
-
sel
|
67
|
-
else
|
68
|
-
Misc.field_position fields, sel
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
def fields_include(field)
|
74
|
-
return TSV.fields_include key_field, fields, field
|
75
|
-
end
|
76
|
-
|
77
|
-
def field_positions(*selected)
|
78
|
-
return nil if selected.nil? or selected == [nil]
|
79
|
-
TSV.field_positions(key_field, fields, *selected)
|
80
|
-
end
|
81
|
-
|
82
|
-
def fields_at(*positions)
|
83
|
-
return nil if fields.nil?
|
84
|
-
return nil if positions.nil? or positions == [nil]
|
85
|
-
(fields + [key_field]).values_at(*positions)
|
86
|
-
end
|
87
|
-
|
88
|
-
#{{{ Iteration, Merging, etc
|
89
|
-
def through(new_key_field = nil, new_fields = nil, &block)
|
90
|
-
new_key_position = (field_positions(new_key_field) || [-1]).first
|
91
|
-
new_fields = [new_fields] if String === new_fields
|
92
|
-
|
93
|
-
if new_key_position == -1
|
94
|
-
|
95
|
-
if new_fields.nil? or new_fields == fields
|
96
|
-
each &block
|
97
|
-
return [key_field, fields]
|
98
|
-
else
|
99
|
-
new_field_positions = field_positions(*new_fields)
|
100
|
-
each do |key, values|
|
101
|
-
if values.nil?
|
102
|
-
yield key, nil
|
103
|
-
else
|
104
|
-
yield key, values.values_at(*new_field_positions)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
return [key_field, fields_at(*new_field_positions)]
|
108
|
-
end
|
109
|
-
|
110
|
-
else
|
111
|
-
new_field_positions = field_positions(*new_fields)
|
112
|
-
|
113
|
-
new_field_names = fields_at(*new_field_positions)
|
114
|
-
if new_field_names.nil? and fields
|
115
|
-
new_field_names = fields.dup
|
116
|
-
new_field_names.delete_at new_key_position
|
117
|
-
new_field_names.unshift key_field
|
118
|
-
end
|
119
|
-
|
120
|
-
each do |key, values|
|
121
|
-
if type == :double
|
122
|
-
tmp_values = values + [[key]]
|
123
|
-
else
|
124
|
-
tmp_values = values + [key]
|
125
|
-
end
|
126
|
-
|
127
|
-
if new_field_positions.nil?
|
128
|
-
new_values = values.dup
|
129
|
-
new_values.delete_at new_key_position
|
130
|
-
new_values.unshift [key]
|
131
|
-
else
|
132
|
-
new_values = tmp_values.values_at(*new_field_positions)
|
133
|
-
end
|
134
|
-
|
135
|
-
if not Array === tmp_values[new_key_position]
|
136
|
-
yield tmp_values[new_key_position], NamedArray.name(new_values, new_field_names)
|
137
|
-
else
|
138
|
-
tmp_values[new_key_position].each do |new_key|
|
139
|
-
if new_field_names
|
140
|
-
yield new_key, NamedArray.name(new_values, new_field_names)
|
141
|
-
else
|
142
|
-
yield new_key, new_values
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
return [(fields_at(new_key_position) || [nil]).first, new_field_names]
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
def process(field)
|
152
|
-
through do |key, values|
|
153
|
-
values[field].replace yield(values[field], key, values) unless values[field].nil?
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
|
158
|
-
def reorder(new_key_field, new_fields = nil, options = {})
|
159
|
-
options = Misc.add_defaults options
|
160
|
-
return TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
|
161
|
-
|
162
|
-
new = {}
|
163
|
-
new_key_field, new_fields = through new_key_field, new_fields do |key, values|
|
164
|
-
if new[key].nil?
|
165
|
-
new[key] = values
|
166
|
-
else
|
167
|
-
new[key] = new[key].zip(values)
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
new.each do |key,values|
|
172
|
-
values.each{|list| list.flatten! if Array === list}
|
173
|
-
end
|
174
|
-
|
175
|
-
if options[:persistence_file]
|
176
|
-
reordered = TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
|
177
|
-
reordered.merge! new
|
178
38
|
else
|
179
|
-
|
39
|
+
fields
|
180
40
|
end
|
181
|
-
|
182
|
-
reordered.key_field = new_key_field
|
183
|
-
reordered.fields = new_fields
|
184
|
-
|
185
|
-
reordered
|
186
41
|
end
|
187
42
|
|
188
|
-
def
|
189
|
-
reorder(:main, new_fields)
|
43
|
+
def self.encapsulate_persistence(file, options)
|
190
44
|
end
|
191
45
|
|
192
|
-
def
|
193
|
-
|
194
|
-
self[key] = values + [yield(key, values)]
|
195
|
-
end
|
196
|
-
|
197
|
-
if fields != nil
|
198
|
-
new_fields = fields + [name]
|
199
|
-
self.fields = new_fields
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
def select(method)
|
204
|
-
new = TSV.new({})
|
205
|
-
new.key_field = key_field
|
206
|
-
new.fields = fields.dup
|
207
|
-
new.type = type
|
208
|
-
new.filename = filename + "#Select: #{method.inspect}"
|
209
|
-
new.case_insensitive = case_insensitive
|
46
|
+
def initialize(file = {}, type = nil, options = {})
|
47
|
+
# Process Options
|
210
48
|
|
211
|
-
case
|
212
|
-
when Array === method
|
213
|
-
through do |key, values|
|
214
|
-
new[key] = values if ([key,values].flatten & method).any?
|
215
|
-
end
|
216
|
-
when Regexp === method
|
217
|
-
through do |key, values|
|
218
|
-
new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
|
219
|
-
end
|
220
|
-
when String === method
|
221
|
-
through do |key, values|
|
222
|
-
new[key] = values if [key,values].flatten.select{|v| v == method}.any?
|
223
|
-
end
|
224
|
-
when Hash === method
|
225
|
-
key = method.keys.first
|
226
|
-
method = method.values.first
|
227
|
-
case
|
228
|
-
when (Array === method and (:main == key or key_field == key))
|
229
|
-
method.each{|item| if values = self[item]; then new[item] = values; end}
|
230
|
-
when Array === method
|
231
|
-
through :main, key do |key, values|
|
232
|
-
new[key] = self[key] if (values.flatten & method).any?
|
233
|
-
end
|
234
|
-
when Regexp === method
|
235
|
-
through :main, key do |key, values|
|
236
|
-
new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
|
237
|
-
end
|
238
|
-
when String === method
|
239
|
-
through :main, key do |key, values|
|
240
|
-
new[key] = self[key] if values.flatten.select{|v| v == method}.any?
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
new
|
247
|
-
end
|
248
|
-
|
249
|
-
def index(options = {})
|
250
|
-
options = Misc.add_defaults options, :order => false, :persistence => false
|
251
|
-
|
252
|
-
new, extra = Persistence.persist(filename, :Index, :tsv, options) do |filename, options|
|
253
|
-
new = {}
|
254
|
-
if options[:order]
|
255
|
-
new_key_field, new_fields = through options[:target], options[:others] do |key, values|
|
256
|
-
|
257
|
-
values.each_with_index do |list, i|
|
258
|
-
next if list.nil? or list.empty?
|
259
|
-
|
260
|
-
list = [list] unless Array === list
|
261
|
-
|
262
|
-
list.each do |value|
|
263
|
-
next if value.nil? or value.empty?
|
264
|
-
value = value.downcase if options[:case_insensitive]
|
265
|
-
new[value] ||= []
|
266
|
-
new[value][i + 1] ||= []
|
267
|
-
new[value][i + 1] << key
|
268
|
-
end
|
269
|
-
new[key] ||= []
|
270
|
-
new[key][0] = key
|
271
|
-
end
|
272
|
-
|
273
|
-
end
|
274
|
-
|
275
|
-
new.each do |key, values|
|
276
|
-
values.flatten!
|
277
|
-
values.compact!
|
278
|
-
end
|
279
|
-
|
280
|
-
else
|
281
|
-
new_key_field, new_fields = through options[:target], options[:others] do |key, values|
|
282
|
-
new[key] ||= []
|
283
|
-
new[key] << key
|
284
|
-
values.each do |list|
|
285
|
-
next if list.nil?
|
286
|
-
if Array === list
|
287
|
-
list.each do |value|
|
288
|
-
value = value.downcase if options[:case_insensitive]
|
289
|
-
new[value] ||= []
|
290
|
-
new[value] << key
|
291
|
-
end
|
292
|
-
else
|
293
|
-
next if list.empty?
|
294
|
-
value = list
|
295
|
-
value = value.downcase if options[:case_insensitive]
|
296
|
-
new[value] ||= []
|
297
|
-
new[value] << key
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
end
|
302
|
-
|
303
|
-
[new, {:key_field => new_key_field, :fields => new_fields, :type => :double, :case_insensitive => options[:case_insensitive]}]
|
304
|
-
end
|
305
|
-
|
306
|
-
new = TSV.new(new)
|
307
|
-
new.filename = "Index: " + filename + options.inspect
|
308
|
-
new.fields = extra[:fields]
|
309
|
-
new.key_field = extra[:key_field]
|
310
|
-
new.case_insensitive = extra[:case_insensitive]
|
311
|
-
new.type = extra[:type]
|
312
|
-
new
|
313
|
-
end
|
314
|
-
|
315
|
-
def smart_merge(other, match = nil, new_fields = nil)
|
316
|
-
|
317
|
-
new_fields = [new_fields] if String === new_fields
|
318
|
-
if self.fields and other.fields
|
319
|
-
common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
|
320
|
-
new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
|
321
|
-
|
322
|
-
common_fields.delete match if String === match
|
323
|
-
common_fields.delete_at match if Integer === match
|
324
|
-
|
325
|
-
this_common_field_positions = self.field_positions *common_fields
|
326
|
-
other_common_field_positions = other.field_positions *common_fields
|
327
|
-
other_new_field_positions = other.field_positions *new_fields
|
328
|
-
else
|
329
|
-
nofieldinfo = true
|
330
|
-
end
|
331
|
-
|
332
|
-
case
|
333
|
-
when TSV === match
|
334
|
-
match_index = match
|
335
|
-
matching_code_position = nil
|
336
|
-
|
337
|
-
when Array === match
|
338
|
-
match_index = match.first
|
339
|
-
matching_code_position = field_positions(match.last).first
|
340
|
-
|
341
|
-
when match =~ /^through:(.*)/
|
342
|
-
through = $1
|
343
|
-
if through =~ /(.*)#using:(.*)/
|
344
|
-
through = $1
|
345
|
-
matching_code_position = field_positions($2).first
|
346
|
-
else
|
347
|
-
matching_code_position = nil
|
348
|
-
end
|
349
|
-
index_fields = TSV.headers(through)
|
350
|
-
target_field = index_fields.select{|field| other.fields_include field}.first
|
351
|
-
Log.debug "Target Field: #{ target_field }"
|
352
|
-
match_index = TSV.open_file(through).index(:field => target_field)
|
353
|
-
|
354
|
-
when field_positions(match).first
|
355
|
-
matching_code_position = field_positions(match).first
|
356
|
-
match_index = nil
|
357
|
-
end
|
358
|
-
|
359
|
-
if matching_code_position.nil? and match_index.fields
|
360
|
-
match_index.fields.each do |field|
|
361
|
-
if matching_code_position = field_positions(field).first
|
362
|
-
break
|
363
|
-
end
|
364
|
-
end
|
365
|
-
end
|
366
|
-
|
367
|
-
if match_index and match_index.key_field == other.key_field
|
368
|
-
other_index = nil
|
369
|
-
else
|
370
|
-
other_index = (match === String and other.fields_include(match)) ?
|
371
|
-
other.index(:other => match, :order => true) : other.index(:order => true)
|
372
|
-
end
|
373
|
-
|
374
|
-
each do |key,values|
|
375
|
-
Log.debug "Key: #{ key }. Values: #{values * ", "}"
|
376
|
-
if matching_code_position.nil? or matching_code_position == -1
|
377
|
-
matching_codes = [key]
|
378
|
-
else
|
379
|
-
matching_codes = values[matching_code_position]
|
380
|
-
matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
|
381
|
-
end
|
382
|
-
Log.debug "Matching codes: #{matching_codes}"
|
383
|
-
|
384
|
-
next if matching_codes.nil?
|
385
|
-
|
386
|
-
matching_codes.each do |matching_code|
|
387
|
-
if match_index
|
388
|
-
if match_index[matching_code]
|
389
|
-
matching_code_fix = match_index[matching_code].first
|
390
|
-
else
|
391
|
-
matching_code_fix = nil
|
392
|
-
end
|
393
|
-
else
|
394
|
-
matching_code_fix = matching_code
|
395
|
-
end
|
396
|
-
|
397
|
-
Log.debug "Matching code (fix): #{matching_code_fix}"
|
398
|
-
next if matching_code_fix.nil?
|
399
|
-
|
400
|
-
if other_index
|
401
|
-
Log.debug "Using other_index"
|
402
|
-
other_codes = other_index[matching_code_fix]
|
403
|
-
else
|
404
|
-
other_codes = matching_code_fix
|
405
|
-
end
|
406
|
-
Log.debug "Other codes: #{other_codes}"
|
407
|
-
|
408
|
-
next if other_codes.nil? or other_codes.empty?
|
409
|
-
other_code = other_codes.first
|
410
|
-
|
411
|
-
if nofieldinfo
|
412
|
-
next if other[other_code].nil?
|
413
|
-
if type == :double
|
414
|
-
other_values = [[other_code]] + other[other_code]
|
415
|
-
else
|
416
|
-
other_values = [other_code] + other[other_code]
|
417
|
-
end
|
418
|
-
other_values.delete_if do |list|
|
419
|
-
list = [list] unless Array === list
|
420
|
-
list.collect{|e| case_insensitive ? e.downcase : e }.
|
421
|
-
select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
|
422
|
-
end
|
423
|
-
|
424
|
-
new_values = values + other_values
|
425
|
-
else
|
426
|
-
if other[other_code].nil?
|
427
|
-
if type == :double
|
428
|
-
other_values = [[]] * other.fields.length
|
429
|
-
else
|
430
|
-
other_values = [] * other.fields.length
|
431
|
-
end
|
432
|
-
else
|
433
|
-
if type == :double
|
434
|
-
other_values = other[other_code] + [[other_code]]
|
435
|
-
else
|
436
|
-
other_values = other[other_code] + [other_code]
|
437
|
-
end
|
438
|
-
end
|
439
|
-
|
440
|
-
|
441
|
-
new_values = values.dup
|
442
|
-
|
443
|
-
if type == :double
|
444
|
-
this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
|
445
|
-
new_values_tops = new_values[tpos]
|
446
|
-
|
447
|
-
if other.type == :double
|
448
|
-
new_values_tops += other_values[opos]
|
449
|
-
else
|
450
|
-
new_values_tops += [other_values[opos]]
|
451
|
-
end
|
452
|
-
|
453
|
-
new_values[tpos] = new_values_tops.uniq
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
new_values.concat other_values.values_at *other_new_field_positions
|
458
|
-
end
|
459
|
-
|
460
|
-
self[key] = new_values
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
self.fields = self.fields + new_fields unless nofieldinfo
|
465
|
-
end
|
466
|
-
|
467
|
-
|
468
|
-
def self.field_matches(tsv, values)
|
469
|
-
if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
|
470
|
-
return {}
|
471
|
-
end
|
472
|
-
|
473
|
-
key_field = tsv.key_field
|
474
|
-
fields = tsv.fields
|
475
|
-
|
476
|
-
field_values = {}
|
477
|
-
fields.each{|field|
|
478
|
-
field_values[field] = []
|
479
|
-
}
|
480
|
-
|
481
|
-
tsv.through do |key,entry_values|
|
482
|
-
fields.zip(entry_values).each do |field,entry_field_values|
|
483
|
-
field_values[field].concat entry_field_values
|
484
|
-
end
|
485
|
-
end
|
486
|
-
|
487
|
-
field_values.each do |field,field_value_list|
|
488
|
-
field_value_list.replace(values & field_value_list.flatten.uniq)
|
489
|
-
end
|
490
|
-
|
491
|
-
field_values[key_field] = values & tsv.keys
|
492
|
-
|
493
|
-
field_values
|
494
|
-
end
|
495
|
-
|
496
|
-
def field_matches(values)
|
497
|
-
TSV.field_matches(self, values)
|
498
|
-
end
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
#{{{ Helpers
|
503
|
-
|
504
|
-
def self.index(file, options = {})
|
505
|
-
options = Misc.add_defaults options, :data_persistence => true, :persistence => true
|
506
|
-
persistence, persistence_file = Misc.process_options options, :persistence, :persistence_file
|
507
|
-
options[:persistence], options[:persistence_file] = options.values_at :data_persistence, :data_persistence_file
|
508
|
-
options.delete :data_persistence
|
509
|
-
options.delete :data_persistence_file
|
510
|
-
|
511
|
-
index, extra = Persistence.persist(file, :Index, :tsv, options) do |file, options, filename|
|
512
|
-
TSV.new(file, :double, options).index
|
513
|
-
end
|
514
|
-
index
|
515
|
-
end
|
516
|
-
|
517
|
-
def self.index2(file, options = {})
|
518
|
-
opt_data = options.dup
|
519
|
-
opt_index = options.dup
|
520
|
-
opt_data.delete :field
|
521
|
-
opt_data.delete :persistence
|
522
|
-
opt_index.delete :persistence
|
523
|
-
|
524
|
-
opt_data[:persistence] = true if options[:data_persistence]
|
525
|
-
|
526
|
-
opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
|
527
|
-
|
528
|
-
if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
|
529
|
-
Log.low "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
|
530
|
-
TSV.new(Persistence::TSV.get(opt_index[:persistence_file], false), opt_index)
|
531
|
-
else
|
532
|
-
Log.low "Creating index for #{ file }: #{opt_index[:persistence_file]}"
|
533
|
-
data = TSV.new(file, opt_data)
|
534
|
-
data.index(opt_index)
|
535
|
-
end
|
536
|
-
end
|
537
|
-
|
538
|
-
def self.open_file(file)
|
539
|
-
if file =~ /(.*?)#(.*)/
|
540
|
-
file, options = $1, Misc.string2hash($2.to_s)
|
541
|
-
else
|
542
|
-
options = {}
|
543
|
-
end
|
544
|
-
|
545
|
-
TSV.new(file, options)
|
546
|
-
end
|
547
|
-
|
548
|
-
#{{{ Accesor Methods
|
549
|
-
attr_accessor :filename, :type, :case_insensitive, :key_field, :fields, :data
|
550
|
-
|
551
|
-
def fields
|
552
|
-
return nil if @fields.nil?
|
553
|
-
fields = @fields
|
554
|
-
fields.each do |f| f.extend Field end if Array === fields
|
555
|
-
fields
|
556
|
-
end
|
557
|
-
|
558
|
-
def fields=(new_fields)
|
559
|
-
@fields = new_fields
|
560
|
-
if Persistence::TSV === @data
|
561
|
-
@data.fields = new_fields
|
562
|
-
end
|
563
|
-
end
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
def keys
|
568
|
-
@data.keys
|
569
|
-
end
|
570
|
-
|
571
|
-
def values
|
572
|
-
@data.values
|
573
|
-
end
|
574
|
-
|
575
|
-
def size
|
576
|
-
@data.size
|
577
|
-
end
|
578
|
-
|
579
|
-
# Write
|
580
|
-
|
581
|
-
def []=(key, value)
|
582
|
-
key = key.downcase if @case_insensitive
|
583
|
-
@data[key] = value
|
584
|
-
end
|
585
|
-
|
586
|
-
|
587
|
-
def merge!(new_data)
|
588
|
-
new_data.each do |key, value|
|
589
|
-
self[key] = value
|
590
|
-
end
|
591
|
-
end
|
592
|
-
|
593
|
-
# Read
|
594
|
-
|
595
|
-
def follow(value)
|
596
|
-
return nil if value.nil?
|
597
|
-
if String === value && value =~ /__Ref:(.*)/
|
598
|
-
return self[$1]
|
599
|
-
else
|
600
|
-
value = NamedArray.name value, fields if Array === value and fields
|
601
|
-
value
|
602
|
-
end
|
603
|
-
end
|
604
|
-
|
605
|
-
def [](key)
|
606
|
-
if Array === key
|
607
|
-
return @data[key] if @data[key] != nil
|
608
|
-
key.each{|k| v = self[k]; return v unless v.nil?}
|
609
|
-
return nil
|
610
|
-
end
|
611
|
-
|
612
|
-
key = key.downcase if @case_insensitive and key !~ /^__Ref:/
|
613
|
-
follow @data[key]
|
614
|
-
end
|
615
|
-
|
616
|
-
def values_at(*keys)
|
617
|
-
keys.collect{|k|
|
618
|
-
self[k]
|
619
|
-
}
|
620
|
-
end
|
621
|
-
|
622
|
-
def each(&block)
|
623
|
-
@data.each do |key, value|
|
624
|
-
block.call(key, follow(value))
|
625
|
-
end
|
626
|
-
end
|
627
|
-
|
628
|
-
def collect
|
629
|
-
if block_given?
|
630
|
-
@data.collect do |key, value|
|
631
|
-
value = follow(value)
|
632
|
-
key, values = yield key, value
|
633
|
-
end
|
634
|
-
else
|
635
|
-
@data.collect do |key, value|
|
636
|
-
[key, follow(value)]
|
637
|
-
end
|
638
|
-
end
|
639
|
-
end
|
640
|
-
|
641
|
-
def sort(&block)
|
642
|
-
collect.sort(&block).collect{|p|
|
643
|
-
key, value = p
|
644
|
-
value = NamedArray.name value, fields if fields
|
645
|
-
[key, value]
|
646
|
-
}
|
647
|
-
end
|
648
|
-
|
649
|
-
def sort_by(&block)
|
650
|
-
collect.sort_by &block
|
651
|
-
end
|
652
|
-
|
653
|
-
def values_to_s(values)
|
654
|
-
case
|
655
|
-
when (values.nil? and fields.nil?)
|
656
|
-
"\n"
|
657
|
-
when (values.nil? and not fields.nil?)
|
658
|
-
"\t" << ([""] * fields.length) * "\t" << "\n"
|
659
|
-
when (not Array === values)
|
660
|
-
"\t" << values.to_s << "\n"
|
661
|
-
when Array === values.first
|
662
|
-
"\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
663
|
-
else
|
664
|
-
"\t" << values * "\t" << "\n"
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
def to_s(keys = nil)
|
669
|
-
str = ""
|
670
|
-
|
671
|
-
if fields
|
672
|
-
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
673
|
-
end
|
674
|
-
|
675
|
-
if keys.nil?
|
676
|
-
each do |key, values|
|
677
|
-
key = key.to_s if Symbol === key
|
678
|
-
str << key.dup << values_to_s(values)
|
679
|
-
end
|
680
|
-
else
|
681
|
-
keys.zip(values_at(*keys)).each do |key, values|
|
682
|
-
key = key.to_s if Symbol === key
|
683
|
-
str << key.dup << values_to_s(values)
|
684
|
-
end
|
685
|
-
end
|
686
|
-
|
687
|
-
str
|
688
|
-
end
|
689
|
-
|
690
|
-
#{{{ Parsing
|
691
|
-
|
692
|
-
def self.parse_fields(io, delimiter = "\t")
|
693
|
-
return [] if io.nil?
|
694
|
-
fields = io.split(delimiter, -1)
|
695
|
-
fields
|
696
|
-
end
|
697
|
-
|
698
|
-
def self.zip_fields(list, fields = nil)
|
699
|
-
return [] if list.nil? || list.empty?
|
700
|
-
fields ||= list.fields if list.respond_to? :fields
|
701
|
-
zipped = list[0].zip(*list[1..-1])
|
702
|
-
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
703
|
-
zipped
|
704
|
-
end
|
705
|
-
|
706
|
-
def self.key_order(file, options = {})
|
707
|
-
# Prepare options
|
708
|
-
options = add_defaults options,
|
709
|
-
:sep => "\t",
|
710
|
-
:sep2 => "|",
|
711
|
-
:native => 0,
|
712
|
-
:fix => nil,
|
713
|
-
:exclude => nil,
|
714
|
-
:select => nil,
|
715
|
-
:grep => nil,
|
716
|
-
:case_insensitive => false,
|
717
|
-
:header_hash => '#'
|
718
|
-
|
719
|
-
options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
|
720
|
-
|
721
|
-
if String === file and File.exists? file
|
722
|
-
file = File.open(file)
|
723
|
-
end
|
724
|
-
|
725
|
-
#{{{ Process first line
|
726
|
-
|
727
|
-
line = file.gets
|
728
|
-
raise "Empty content" if line.nil?
|
729
|
-
line.chomp!
|
730
|
-
|
731
|
-
if line =~ /^#{options[:header_hash]}/
|
732
|
-
header_fields = parse_fields(line, options[:sep])
|
733
|
-
header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
|
734
|
-
line = file.gets
|
735
|
-
else
|
736
|
-
header_fields = nil
|
737
|
-
end
|
738
|
-
|
739
|
-
id_pos = Misc.field_position(header_fields, options[:native])
|
740
|
-
|
741
|
-
if options[:extra].nil?
|
742
|
-
extra_pos = nil
|
743
|
-
max_cols = 0
|
744
|
-
else
|
745
|
-
extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
|
746
|
-
end
|
747
|
-
|
748
|
-
ids = []
|
749
|
-
#{{{ Process rest
|
750
|
-
while line do
|
751
|
-
line.chomp!
|
752
|
-
|
753
|
-
line = options[:fix].call line if options[:fix]
|
754
|
-
break if not line
|
755
|
-
|
756
|
-
# Select and fix lines
|
757
|
-
if line.empty? or
|
758
|
-
(options[:exclude] and options[:exclude].call(line)) or
|
759
|
-
(options[:select] and not options[:select].call(line))
|
760
|
-
|
761
|
-
line = file.gets
|
762
|
-
next
|
763
|
-
end
|
764
|
-
|
765
|
-
### Process line
|
766
|
-
|
767
|
-
# Chunk fields
|
768
|
-
parts = parse_fields(line, options[:sep])
|
769
|
-
|
770
|
-
# Get next line
|
771
|
-
line = file.gets
|
772
|
-
|
773
|
-
# Get id field
|
774
|
-
next if parts[id_pos].nil? || parts[id_pos].empty?
|
775
|
-
ids << parts[id_pos]
|
776
|
-
end
|
777
|
-
|
778
|
-
ids
|
779
|
-
end
|
780
|
-
|
781
|
-
def self.parse_header(stream, sep, header_hash)
|
782
|
-
fields, key_field = nil
|
783
|
-
options = {}
|
784
|
-
|
785
|
-
line = stream.gets
|
786
|
-
|
787
|
-
if line and line =~ /^#{header_hash}: (.*)/
|
788
|
-
options = Misc.string2hash $1
|
789
|
-
line = stream.gets
|
790
|
-
end
|
791
|
-
|
792
|
-
sep = options[:sep] if options[:sep]
|
793
|
-
|
794
|
-
if line and line =~ /^#{header_hash}/
|
795
|
-
line.chomp!
|
796
|
-
fields = parse_fields(line, sep)
|
797
|
-
key_field = fields.shift
|
798
|
-
key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
|
799
|
-
line = stream.gets
|
800
|
-
end
|
801
|
-
|
802
|
-
raise "Empty content" if line.nil?
|
803
|
-
return key_field, fields, options, line
|
804
|
-
end
|
805
|
-
|
806
|
-
def self.parse(stream, options = {})
|
807
|
-
# Prepare options
|
808
|
-
options = Misc.add_defaults options,
|
809
|
-
:case_insensitive => false,
|
810
|
-
:type => :double,
|
811
|
-
|
812
|
-
:merge => false,
|
813
|
-
:keep_empty => true,
|
814
|
-
:cast => nil,
|
815
|
-
|
816
|
-
:sep => "\t",
|
817
|
-
:sep2 => "|",
|
818
|
-
:header_hash => '#',
|
819
|
-
|
820
|
-
:key => 0,
|
821
|
-
:fields => nil,
|
822
|
-
|
823
|
-
:fix => nil,
|
824
|
-
:exclude => nil,
|
825
|
-
:select => nil,
|
826
|
-
:grep => nil
|
827
|
-
|
828
|
-
|
829
|
-
sep, header_hash =
|
830
|
-
Misc.process_options options, :sep, :header_hash
|
831
|
-
|
832
|
-
key_field, other_fields, more_options, line = TSV.parse_header(stream, sep, header_hash)
|
833
|
-
|
834
|
-
sep = more_options[:sep] if more_options[:sep]
|
835
|
-
options = Misc.add_defaults options, more_options
|
836
|
-
sep2 = Misc.process_options options, :sep2
|
837
|
-
|
838
|
-
key, others =
|
839
|
-
Misc.process_options options, :key, :others
|
840
|
-
|
841
|
-
if key_field.nil?
|
842
|
-
key_pos = key
|
843
|
-
key_field, fields = nil
|
844
|
-
else
|
845
|
-
all_fields = [key_field].concat other_fields
|
846
|
-
|
847
|
-
key_pos = Misc.field_position(all_fields, key)
|
848
|
-
|
849
|
-
if String === others or Symbol === others
|
850
|
-
others = [others]
|
851
|
-
end
|
852
|
-
|
853
|
-
if others.nil?
|
854
|
-
other_pos = (0..(all_fields.length - 1)).to_a
|
855
|
-
other_pos.delete key_pos
|
856
|
-
else
|
857
|
-
other_pos = Misc.field_position(all_fields, *others)
|
858
|
-
end
|
859
|
-
|
860
|
-
key_field = all_fields[key_pos]
|
861
|
-
fields = all_fields.values_at *other_pos
|
862
|
-
end
|
863
|
-
|
864
|
-
case_insensitive, type, merge, keep_empty, cast =
|
865
|
-
Misc.process_options options, :case_insensitive, :type, :merge, :keep_empty, :cast
|
866
|
-
fix, exclude, select, grep =
|
867
|
-
Misc.process_options options, :fix, :exclude, :select, :grep
|
868
|
-
|
869
|
-
#{{{ Process rest
|
870
|
-
data = {}
|
871
|
-
single = type.to_sym != :double
|
872
|
-
max_cols = 0
|
873
|
-
while line do
|
874
|
-
line.chomp!
|
875
|
-
|
876
|
-
line = fix.call line if fix
|
877
|
-
break if not line
|
878
|
-
|
879
|
-
if header_hash and line =~ /^#{header_hash}/
|
880
|
-
line = stream.gets
|
881
|
-
next
|
882
|
-
end
|
883
|
-
|
884
|
-
if line.empty? or
|
885
|
-
(exclude and exclude.call(line)) or
|
886
|
-
(select and not select.call(line))
|
887
|
-
|
888
|
-
line = stream.gets
|
889
|
-
next
|
890
|
-
end
|
891
|
-
|
892
|
-
# Chunk fields
|
893
|
-
parts = parse_fields(line, sep)
|
894
|
-
|
895
|
-
# Get next line
|
896
|
-
line = stream.gets
|
897
|
-
|
898
|
-
# Get id field
|
899
|
-
next if parts[key_pos].nil? || parts[key_pos].empty?
|
900
|
-
|
901
|
-
if single
|
902
|
-
ids = parse_fields(parts[key_pos], sep2)
|
903
|
-
ids.collect!{|id| id.downcase} if case_insensitive
|
904
|
-
|
905
|
-
id = ids.shift
|
906
|
-
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
907
|
-
|
908
|
-
if key_field.nil?
|
909
|
-
other_pos = (0..(parts.length - 1)).to_a
|
910
|
-
other_pos.delete key_pos
|
911
|
-
end
|
912
|
-
|
913
|
-
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
|
914
|
-
extra.collect! do |elem|
|
915
|
-
case
|
916
|
-
when String === cast
|
917
|
-
elem.send(cast)
|
918
|
-
when Proc === cast
|
919
|
-
cast.call elem
|
920
|
-
end
|
921
|
-
end if cast
|
922
|
-
|
923
|
-
max_cols = extra.size if extra.size > (max_cols || 0)
|
924
|
-
case type
|
925
|
-
when :list
|
926
|
-
data[id] = extra unless data.include? id
|
927
|
-
when :flat
|
928
|
-
data[id] = extra.flatten unless data.include? id
|
929
|
-
when :single
|
930
|
-
data[id] = extra.flatten.first unless data.include? id
|
931
|
-
end
|
932
|
-
|
933
|
-
else
|
934
|
-
ids = parse_fields(parts[key_pos], sep2)
|
935
|
-
ids.collect!{|id| id.downcase} if case_insensitive
|
936
|
-
|
937
|
-
id = ids.shift
|
938
|
-
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
939
|
-
|
940
|
-
if key_field.nil?
|
941
|
-
other_pos = (0..(parts.length - 1)).to_a
|
942
|
-
other_pos.delete key_pos
|
943
|
-
end
|
944
|
-
|
945
|
-
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
|
946
|
-
extra.collect! do |list|
|
947
|
-
case
|
948
|
-
when String === cast
|
949
|
-
list.collect{|elem| elem.send(cast)}
|
950
|
-
when Proc === cast
|
951
|
-
list.collect{|elem| cast.call elem}
|
952
|
-
end
|
953
|
-
end if cast
|
954
|
-
|
955
|
-
max_cols = extra.size if extra.size > (max_cols || 0)
|
956
|
-
if merge
|
957
|
-
data[id] = extra unless data.include? id
|
958
|
-
else
|
959
|
-
if not data.include? id
|
960
|
-
data[id] = extra
|
961
|
-
else
|
962
|
-
entry = data[id]
|
963
|
-
while entry =~ /__Ref:(.*)/ do entry = data[$1] end
|
964
|
-
extra.each_with_index do |f, i|
|
965
|
-
if f.empty?
|
966
|
-
next unless keep_empty
|
967
|
-
f= [""]
|
968
|
-
end
|
969
|
-
entry[i] ||= []
|
970
|
-
entry[i] = entry[i].concat f
|
971
|
-
end
|
972
|
-
data[id] = entry
|
973
|
-
end
|
974
|
-
end
|
975
|
-
end
|
976
|
-
end
|
977
|
-
|
978
|
-
if keep_empty and max_cols > 0
|
979
|
-
data.each do |key, values|
|
980
|
-
next if values =~ /__Ref:/
|
981
|
-
new_values = values
|
982
|
-
max_cols.times do |i|
|
983
|
-
if type == :double
|
984
|
-
new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
|
985
|
-
else
|
986
|
-
new_values[i] = "" if new_values[i].nil?
|
987
|
-
end
|
988
|
-
end
|
989
|
-
data[key] = new_values
|
990
|
-
end
|
991
|
-
end
|
992
|
-
|
993
|
-
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive}]
|
994
|
-
end
|
995
|
-
|
996
|
-
def self.parse2(data, file, options = {})
|
997
|
-
|
998
|
-
# Prepare options
|
999
|
-
options = Misc.add_defaults options,
|
1000
|
-
:sep => "\t",
|
1001
|
-
:sep2 => "|",
|
1002
|
-
:native => 0,
|
1003
|
-
:extra => nil,
|
1004
|
-
:fix => nil,
|
1005
|
-
:exclude => nil,
|
1006
|
-
:select => nil,
|
1007
|
-
:grep => nil,
|
1008
|
-
:single => false,
|
1009
|
-
:unique => false,
|
1010
|
-
:merge => false,
|
1011
|
-
:flatten => false,
|
1012
|
-
:keep_empty => true,
|
1013
|
-
:case_insensitive => false,
|
1014
|
-
:header_hash => '#' ,
|
1015
|
-
:cast => nil,
|
1016
|
-
:persistence_file => nil
|
1017
|
-
|
1018
|
-
|
1019
|
-
options[:unique] = options[:uniq] if options[:unique].nil?
|
1020
|
-
options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
|
1021
|
-
options[:flatten] = true if options[:single]
|
1022
|
-
|
1023
|
-
#{{{ Process first line
|
1024
|
-
|
1025
|
-
line = file.gets
|
1026
|
-
raise "Empty content" if line.nil?
|
1027
|
-
line.chomp!
|
1028
|
-
|
1029
|
-
if line =~ /^#{options[:header_hash]}/
|
1030
|
-
header_fields = parse_fields(line, options[:sep])
|
1031
|
-
header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
|
1032
|
-
line = file.gets
|
1033
|
-
else
|
1034
|
-
header_fields = nil
|
1035
|
-
end
|
1036
|
-
|
1037
|
-
id_pos = Misc.field_position(header_fields, options[:native])
|
1038
|
-
|
1039
|
-
if options[:extra].nil?
|
1040
|
-
extra_pos = nil
|
1041
|
-
max_cols = 0
|
1042
|
-
else
|
1043
|
-
extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
|
1044
|
-
end
|
1045
|
-
|
1046
|
-
#{{{ Process rest
|
1047
|
-
while line do
|
1048
|
-
line.chomp!
|
1049
|
-
|
1050
|
-
line = options[:fix].call line if options[:fix]
|
1051
|
-
break if not line
|
1052
|
-
|
1053
|
-
if options[:header_hash] && line =~ /^#{options[:header_hash]}/
|
1054
|
-
line = file.gets
|
1055
|
-
next
|
1056
|
-
end
|
1057
|
-
|
1058
|
-
# Select and fix lines
|
1059
|
-
if line.empty? or
|
1060
|
-
(options[:exclude] and options[:exclude].call(line)) or
|
1061
|
-
(options[:select] and not options[:select].call(line))
|
1062
|
-
|
1063
|
-
line = file.gets
|
1064
|
-
next
|
1065
|
-
end
|
1066
|
-
|
1067
|
-
### Process line
|
1068
|
-
|
1069
|
-
# Chunk fields
|
1070
|
-
parts = parse_fields(line, options[:sep])
|
1071
|
-
|
1072
|
-
# Get next line
|
1073
|
-
line = file.gets
|
1074
|
-
|
1075
|
-
# Get id field
|
1076
|
-
next if parts[id_pos].nil? || parts[id_pos].empty?
|
1077
|
-
ids = parse_fields(parts[id_pos], options[:sep2])
|
1078
|
-
ids.collect!{|id| id.downcase } if options[:case_insensitive]
|
1079
|
-
|
1080
|
-
# Get extra fields
|
1081
|
-
|
1082
|
-
if options[:extra].nil? and not (options[:flatten] or options[:single])
|
1083
|
-
extra = parts
|
1084
|
-
extra.delete_at(id_pos)
|
1085
|
-
max_cols = extra.size if extra.size > (max_cols || 0)
|
1086
|
-
else
|
1087
|
-
if extra_pos.nil?
|
1088
|
-
extra = parts
|
1089
|
-
extra.delete_at id_pos
|
1090
|
-
else
|
1091
|
-
extra = parts.values_at(*extra_pos)
|
1092
|
-
end
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
extra.collect!{|value| parse_fields(value, options[:sep2])}
|
1096
|
-
extra.collect!{|values| values.first} if options[:unique]
|
1097
|
-
extra.flatten! if options[:flatten]
|
1098
|
-
extra = extra.first if options[:single]
|
1099
|
-
|
1100
|
-
if options[:cast]
|
1101
|
-
if Array === extra[0]
|
1102
|
-
e = extra
|
1103
|
-
else
|
1104
|
-
e = [extra]
|
1105
|
-
end
|
1106
|
-
|
1107
|
-
e.each do |list|
|
1108
|
-
case
|
1109
|
-
when String === options[:cast]
|
1110
|
-
list.collect!{|elem| elem.send(options[:cast])}
|
1111
|
-
when Proc === options[:cast]
|
1112
|
-
list.collect!{|elem| options[:cast].call elem}
|
1113
|
-
end
|
1114
|
-
end
|
1115
|
-
end
|
1116
|
-
|
1117
|
-
main_entry = ids.shift
|
1118
|
-
ids.each do |id| data[id] = "__Ref:#{main_entry}" end
|
1119
|
-
|
1120
|
-
case
|
1121
|
-
when (options[:single] or options[:unique] or not options[:merge])
|
1122
|
-
data[main_entry] = extra unless data.include? main_entry
|
1123
|
-
when options[:flatten]
|
1124
|
-
entry = data[main_entry]
|
1125
|
-
|
1126
|
-
if entry.nil?
|
1127
|
-
data[main_entry] = extra
|
1128
|
-
else
|
1129
|
-
while entry =~ /__Ref:(.*)/ do entry = data[$1] end
|
1130
|
-
if Persistence::TSV === data
|
1131
|
-
data[main_entry] = entry.concat extra
|
1132
|
-
else
|
1133
|
-
data[main_entry].concat extra
|
1134
|
-
end
|
1135
|
-
end
|
1136
|
-
else
|
1137
|
-
entry = data[main_entry]
|
1138
|
-
if entry.nil?
|
1139
|
-
data[main_entry] = extra
|
1140
|
-
else
|
1141
|
-
while entry =~ /__Ref:(.*)/ do entry = data[$1] end
|
1142
|
-
extra.each_with_index do |fields, i|
|
1143
|
-
if fields.empty?
|
1144
|
-
next unless options[:keep_empty]
|
1145
|
-
fields = [""]
|
1146
|
-
end
|
1147
|
-
entry[i] ||= []
|
1148
|
-
entry[i] = entry[i].concat fields
|
1149
|
-
end
|
1150
|
-
data[main_entry] = entry
|
1151
|
-
end
|
1152
|
-
end
|
1153
|
-
end
|
1154
|
-
|
1155
|
-
if options[:keep_empty] and not max_cols.nil?
|
1156
|
-
data.each do |key,values|
|
1157
|
-
new_values = values
|
1158
|
-
max_cols.times do |i|
|
1159
|
-
new_values[i] ||= [""]
|
1160
|
-
end
|
1161
|
-
data[key] = new_values
|
1162
|
-
end
|
1163
|
-
end
|
1164
|
-
|
1165
|
-
# Save header information
|
1166
|
-
key_field = nil
|
1167
|
-
fields = nil
|
1168
|
-
if header_fields && header_fields.any?
|
1169
|
-
key_field = header_fields[id_pos]
|
1170
|
-
if extra_pos.nil?
|
1171
|
-
fields = header_fields
|
1172
|
-
fields.delete_at(id_pos)
|
1173
|
-
else
|
1174
|
-
fields = header_fields.values_at(*extra_pos)
|
1175
|
-
end
|
1176
|
-
end
|
1177
|
-
|
1178
|
-
data.read if Persistence::TSV === data
|
1179
|
-
|
1180
|
-
[key_field, fields]
|
1181
|
-
end
|
1182
|
-
def initialize(file = {}, type = :double, options = {})
|
1183
49
|
if Hash === type
|
1184
50
|
options = type
|
1185
|
-
type =
|
51
|
+
type = nil
|
1186
52
|
end
|
1187
53
|
|
54
|
+
## Remove options from filename
|
1188
55
|
if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
|
1189
56
|
options = Misc.add_defaults options, Misc.string2hash($2)
|
1190
57
|
file = $1
|
1191
58
|
end
|
1192
59
|
|
1193
|
-
options = Misc.add_defaults options, :persistence => false, :
|
60
|
+
options = Misc.add_defaults options, :persistence => false, :type => type
|
61
|
+
|
62
|
+
# Extract Filename
|
63
|
+
|
64
|
+
file, extra = file if Array === file and file.length == 2 and Hash === file.last
|
1194
65
|
|
1195
66
|
@filename = Misc.process_options options, :filename
|
1196
67
|
@filename ||= case
|
68
|
+
when Path === file
|
69
|
+
file
|
1197
70
|
when (String === file and File.exists? file)
|
1198
71
|
File.expand_path file
|
72
|
+
when String === file
|
73
|
+
file
|
1199
74
|
when File === file
|
1200
75
|
File.expand_path file.path
|
76
|
+
when TSV === file
|
77
|
+
File.expand_path file.filename
|
78
|
+
when (Persistence::TSV === file and file.filename)
|
79
|
+
File.expand_path file.filename
|
1201
80
|
else
|
1202
|
-
|
81
|
+
file.class.to_s
|
1203
82
|
end
|
1204
83
|
|
1205
|
-
|
1206
|
-
|
84
|
+
# Process With Persistence
|
85
|
+
# Use filename to identify the persistence
|
86
|
+
# Several inputs supported
|
87
|
+
# Filename or File: Parsed
|
88
|
+
# Hash: Encapsulated, empty info
|
89
|
+
# TSV: Duplicate
|
90
|
+
case
|
91
|
+
when block_given?
|
92
|
+
@data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options.merge(:force_array => true)) do |filename, options| yield filename, options end
|
93
|
+
extra.each do |key, values|
|
94
|
+
self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
|
95
|
+
end if not extra.nil?
|
96
|
+
|
1207
97
|
else
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
98
|
+
case
|
99
|
+
when Hash === file
|
100
|
+
@data = file
|
101
|
+
when TSV === file
|
102
|
+
@data = file.data
|
103
|
+
when Persistence::TSV === file
|
104
|
+
@data = file
|
105
|
+
%w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
|
106
|
+
if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
|
107
|
+
self.send "#{key}=".to_sym, @data.send(key.to_sym)
|
1214
108
|
end
|
1215
|
-
when File === file
|
1216
|
-
data, extra = TSV.parse(file, options)
|
1217
|
-
when Hash === file
|
1218
|
-
data = file
|
1219
|
-
extra = {:case_insensitive => options[:case_insensitive], :type => type}
|
1220
109
|
end
|
1221
|
-
|
1222
|
-
[data, extra]
|
1223
|
-
end
|
1224
|
-
end
|
1225
|
-
|
1226
|
-
@type = extra[:type]
|
1227
|
-
@key_field = extra[:key_field]
|
1228
|
-
@fields = extra[:fields]
|
1229
|
-
@case_insensitive = extra[:case_insensitive]
|
1230
|
-
end
|
1231
|
-
|
1232
|
-
def initialize2(file = {}, options = {})
|
1233
|
-
options = Misc.add_defaults options
|
1234
|
-
options[:persistence] = true if options[:persistence_file]
|
1235
|
-
|
1236
|
-
if String === file && file =~ /(.*?)#(.*)/
|
1237
|
-
file, file_options = $1, $2
|
1238
|
-
options = Misc.add_defaults file_options, options
|
1239
|
-
end
|
1240
|
-
|
1241
|
-
@case_insensitive = options[:case_insensitive] == true
|
1242
|
-
@list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
|
1243
|
-
|
1244
|
-
case
|
1245
|
-
when TSV === file
|
1246
|
-
Log.low "Copying TSV"
|
1247
|
-
@filename = file.filename
|
1248
|
-
|
1249
|
-
if options[:persistence] and not Persistence::TSV === file.data
|
1250
|
-
persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
|
1251
|
-
Log.low "Making persistance #{ persistence_file }"
|
1252
|
-
@data = TCHash.get(persistence_file)
|
1253
|
-
@data.merge! file
|
1254
|
-
@data.key_field = file.key_field
|
1255
|
-
@data.fields = file.fields
|
1256
|
-
else
|
1257
|
-
@data = file.data
|
1258
|
-
end
|
1259
|
-
|
1260
|
-
@key_field = file.key_field
|
1261
|
-
@fields = file.fields
|
1262
|
-
@case_insensitive = file.case_insensitive
|
1263
|
-
@list = file.list
|
1264
|
-
return self
|
1265
|
-
when Hash === file
|
1266
|
-
Log.low "Encapsulating Hash in TSV object"
|
1267
|
-
@filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
|
1268
|
-
if options[:persistence]
|
1269
|
-
persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
|
1270
|
-
Log.low "Making persistance #{ persistence_file }"
|
1271
|
-
@data = TCHash.get(persistence_file)
|
1272
|
-
@data.merge! file
|
1273
110
|
else
|
1274
|
-
@data = file
|
1275
|
-
|
1276
|
-
return self
|
1277
|
-
when Persistence::TSV === file
|
1278
|
-
Log.low "Encapsulating Persistence::TSV"
|
1279
|
-
@filename = "Persistence::TSV:" + Digest::MD5.hexdigest(file.inspect)
|
1280
|
-
@data = file
|
1281
|
-
@key_field = file.key_field
|
1282
|
-
@fields = file.fields
|
1283
|
-
return self
|
1284
|
-
when File === file
|
1285
|
-
@filename = File.expand_path file.path
|
1286
|
-
when String === file && File.exists?(file)
|
1287
|
-
@filename = File.expand_path file
|
1288
|
-
file = Open.open(file)
|
1289
|
-
when StringIO
|
1290
|
-
else
|
1291
|
-
raise "File #{file} not found"
|
1292
|
-
end
|
111
|
+
@data, extra = Persistence.persist(@filename, :TSV, :tsv_extra, options) do |file, options, filename|
|
112
|
+
data, extra = nil
|
1293
113
|
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
114
|
+
case
|
115
|
+
## Parse source
|
116
|
+
when (String === file and file.respond_to? :open)
|
117
|
+
data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
|
118
|
+
extra[:namespace] ||= file.namespace
|
119
|
+
extra[:datadir] ||= file.datadir
|
120
|
+
when StringIO === file
|
121
|
+
data, extra = TSV.parse(file, options)
|
122
|
+
when Open.can_open?(file)
|
123
|
+
Open.open(file, :grep => options[:grep]) do |f|
|
124
|
+
data, extra = TSV.parse(f, options)
|
125
|
+
end
|
126
|
+
#extra[:namespace] = File.basename(File.dirname(filename))
|
127
|
+
#extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
|
128
|
+
when File === file
|
129
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
130
|
+
data, extra = TSV.parse(file, options)
|
131
|
+
extra[:namespace] = File.basename(File.dirname(file.filename))
|
132
|
+
extra.delete :namespace if extra[:namespace].empty? or extra[:namespace] == "."
|
133
|
+
## Encapsulate Hash or TSV
|
134
|
+
when block_given?
|
135
|
+
data
|
136
|
+
else
|
137
|
+
raise "Unknown input in TSV.new #{file.inspect}"
|
138
|
+
end
|
1297
139
|
|
1298
|
-
|
1299
|
-
Log.low "Loading Persistence for #{ @filename } in #{persistence_file}"
|
1300
|
-
@data = Persistence::TSV.get(persistence_file, false)
|
1301
|
-
@key_field = @data.key_field
|
1302
|
-
@fields = @data.fields
|
1303
|
-
else
|
1304
|
-
@data = Persistence::TSV.get(persistence_file, true)
|
1305
|
-
file = Open.grep(file, options[:grep]) if options[:grep]
|
140
|
+
extra[:filename] = filename
|
1306
141
|
|
1307
|
-
|
1308
|
-
|
1309
|
-
@data.key_field = @key_field
|
1310
|
-
@data.fields = @fields
|
1311
|
-
@data.read
|
142
|
+
[data, extra]
|
143
|
+
end
|
1312
144
|
end
|
1313
|
-
else
|
1314
|
-
Log.low "Non-persistent parsing for #{ @filename }"
|
1315
|
-
@data = {}
|
1316
|
-
file = Open.grep(file, options[:grep]) if options[:grep]
|
1317
|
-
@key_field, @fields = TSV.parse(@data, file, options)
|
1318
145
|
end
|
1319
146
|
|
1320
|
-
|
1321
|
-
|
147
|
+
if not extra.nil?
|
148
|
+
%w(case_insensitive namespace datadir fields key_field type filename cast).each do |key|
|
149
|
+
if extra.include? key.to_sym
|
150
|
+
self.send("#{key}=".to_sym, extra[key.to_sym])
|
151
|
+
if @data.respond_to? "#{key}=".to_sym
|
152
|
+
@data.send("#{key}=".to_sym, extra[key.to_sym])
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
1322
157
|
end
|
1323
158
|
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
require 'rbbt/util/cachehelper'
|
1328
|
-
module CacheHelper
|
1329
|
-
def self.tsv_cache(name, key = [])
|
1330
|
-
cache_file = CacheHelper.build_filename name, key
|
159
|
+
def write
|
160
|
+
@data.write if @data.respond_to? :write
|
161
|
+
end
|
1331
162
|
|
1332
|
-
|
1333
|
-
|
1334
|
-
hash = TCHash.get(cache_file)
|
1335
|
-
TSV.new(hash)
|
1336
|
-
else
|
1337
|
-
Log.debug "Producing TSV cache file '#{cache_file}'"
|
1338
|
-
data = yield
|
1339
|
-
TSV.new(data, :persistence_file => cache_file)
|
1340
|
-
end
|
163
|
+
def read
|
164
|
+
@data.read if @data.respond_to? :read
|
1341
165
|
end
|
166
|
+
|
1342
167
|
end
|