tb 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ # lib/tb/qtsv.rb - quoted TSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb
28
+
29
+ # quoted TSV is a variant of TSV (tab separated value)
30
+ #
31
+ # All non-empty values are quoted by double-quotes.
32
+
33
+ def Tb.load_qtsv(filename, *header_fields, &block)
34
+ Tb.parse_qtsv(File.read(filename), *header_fields, &block)
35
+ end
36
+
37
+ def Tb.qtsv_stream_input(qtsv)
38
+ qtsv = qtsv.read unless String === qtsv
39
+ qtsv = qtsv.dup
40
+ cells = []
41
+ verify = ''
42
+ qtsv.scan(/\G("(.*?)"|)(\t|\n|\r\n|\z)/m) {
43
+ verify << $&
44
+ cell = $2
45
+ sep = $3
46
+ break if cell.nil? && sep.empty?
47
+ cells << cell
48
+ if sep != "\t"
49
+ yield cells
50
+ cells = []
51
+ end
52
+ }
53
+ if verify != qtsv
54
+ if qtsv.start_with?(verify)
55
+ raise "unexpected scan ('verify' is a prefix of 'qtsv')"
56
+ end
57
+ if verify.length != qtsv.length
58
+ raise "unexpected scan (length differ: orig:#{qtsv.length} verify:#{verify.length})"
59
+ end
60
+ raise "unexpected scan"
61
+ end
62
+ nil
63
+ end
64
+
65
+ def Tb.parse_qtsv(qtsv, *header_fields)
66
+ aa = []
67
+ qtsv_stream_input(qtsv) {|ary|
68
+ aa << ary
69
+ }
70
+ aa = yield aa if block_given?
71
+ if header_fields.empty?
72
+ aa.shift while aa.first.all? {|elt| elt.nil? || elt == '' }
73
+ header_fields = aa.shift
74
+ h = Hash.new(0)
75
+ header_fields.each_with_index {|f, i|
76
+ if h.include? f
77
+ raise ArgumentError, "ambiguous header field: #{f.inspect} (#{h[f]}th and #{i}th)"
78
+ end
79
+ h[f] = i
80
+ }
81
+ end
82
+ t = Tb.new(header_fields)
83
+ aa.each {|ary|
84
+ h = {}
85
+ header_fields.each_with_index {|f, i|
86
+ h[f] = ary[i]
87
+ }
88
+ t.insert(h)
89
+ }
90
+ t
91
+ end
92
+
93
+ end
@@ -0,0 +1,213 @@
1
+ # lib/tb/reader.rb - Tb::Reader class
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb
28
+ def Tb.load_csv(filename, *header_fields, &block)
29
+ Tb.parse_csv(File.read(filename), *header_fields, &block)
30
+ end
31
+
32
+ def Tb.parse_csv(csv, *header_fields)
33
+ aa = []
34
+ csv_stream_input(csv) {|ary|
35
+ aa << ary
36
+ }
37
+ aa = yield aa if block_given?
38
+ if header_fields.empty?
39
+ reader = Tb::Reader.new(aa)
40
+ arys = []
41
+ reader.each {|ary|
42
+ arys << ary
43
+ }
44
+ header = reader.header
45
+ else
46
+ header = header_fields
47
+ arys = aa
48
+ end
49
+ t = Tb.new(header)
50
+ arys.each {|ary|
51
+ ary << nil while ary.length < header.length
52
+ t.insert_values header, ary
53
+ }
54
+ t
55
+ end
56
+
57
+ def Tb.load_tsv(filename, *header_fields, &block)
58
+ Tb.parse_tsv(File.read(filename), *header_fields, &block)
59
+ end
60
+
61
+ def Tb.parse_tsv(tsv, *header_fields)
62
+ aa = []
63
+ tsv_stream_input(tsv) {|ary|
64
+ aa << ary
65
+ }
66
+ aa = yield aa if block_given?
67
+ if header_fields.empty?
68
+ reader = Tb::Reader.new(aa)
69
+ arys = []
70
+ reader.each {|ary|
71
+ arys << ary
72
+ }
73
+ header = reader.header
74
+ else
75
+ header = header_fields
76
+ arys = aa
77
+ end
78
+ t = Tb.new(header)
79
+ arys.each {|ary|
80
+ ary << nil while ary.length < header.length
81
+ t.insert_values header, ary
82
+ }
83
+ t
84
+ end
85
+ end
86
+
87
+ class Tb::Reader
88
+ def self.open(filename, opts={})
89
+ io = nil
90
+ case filename
91
+ when /\.csv\z/
92
+ io = File.open(filename)
93
+ rawreader = Tb::CSVReader.new(io)
94
+ when /\.tsv\z/
95
+ io = File.open(filename)
96
+ rawreader = Tb::TSVReader.new(io)
97
+ when /\Acsv:/
98
+ io = File.open($')
99
+ rawreader = Tb::CSVReader.new(io)
100
+ when /\Atsv:/
101
+ io = File.open($')
102
+ rawreader = Tb::TSVReader.new(io)
103
+ else
104
+ if filename == '-'
105
+ rawreader = Tb::CSVReader.new(STDIN)
106
+ else
107
+ # guess table format?
108
+ io = File.open(filename)
109
+ rawreader = Tb::CSVReader.new(io)
110
+ end
111
+ end
112
+ reader = self.new(rawreader, opts)
113
+ if block_given?
114
+ begin
115
+ yield reader
116
+ ensure
117
+ reader.close
118
+ end
119
+ else
120
+ reader
121
+ end
122
+
123
+ end
124
+
125
+ def initialize(rawreader, opts={})
126
+ @opt_n = opts[:numeric]
127
+ @reader = rawreader
128
+ @fieldset = nil
129
+ end
130
+
131
+ def header
132
+ return @fieldset.header if @fieldset
133
+ if @opt_n
134
+ @fieldset = Tb::FieldSet.new
135
+ else
136
+ while ary = @reader.shift
137
+ if ary.all? {|elt| elt.nil? || elt == '' }
138
+ next
139
+ else
140
+ @fieldset = Tb::FieldSet.new(*ary)
141
+ return @fieldset.header
142
+ end
143
+ end
144
+ @fieldset = Tb::FieldSet.new
145
+ end
146
+ return @fieldset.header
147
+ end
148
+
149
+ def index_from_field(f)
150
+ self.header
151
+ if @opt_n
152
+ raise "numeric field start from 1: #{f.inspect}" if /\A0+\z/ =~ f
153
+ raise "numeric field name expected: #{f.inspect}" if /\A(\d+)\z/ !~ f
154
+ $1.to_i - 1
155
+ else
156
+ @fieldset.index_from_field(f)
157
+ end
158
+ end
159
+
160
+ def field_from_index_ex(i)
161
+ raise ArgumentError, "negative index: #{i}" if i < 0
162
+ self.header
163
+ if @opt_n
164
+ if @fieldset.length <= i
165
+ @fieldset.add_fields(*(@fieldset.header.length..i).to_a.map {|j| "#{j+1}" })
166
+ end
167
+ end
168
+ @fieldset.field_from_index_ex(i)
169
+ end
170
+
171
+ def field_from_index(i)
172
+ raise ArgumentError, "negative index: #{i}" if i < 0
173
+ self.header
174
+ if @opt_n
175
+ return "#{i+1}"
176
+ end
177
+ @fieldset.field_from_index(i)
178
+ end
179
+
180
+ def shift
181
+ header
182
+ ary = @reader.shift
183
+ field_from_index_ex(ary.length-1) if ary && !ary.empty?
184
+ ary
185
+ end
186
+
187
+ def each
188
+ while ary = self.shift
189
+ yield ary
190
+ end
191
+ nil
192
+ end
193
+
194
+ def close
195
+ @reader.close
196
+ end
197
+
198
+ def fix_header(header)
199
+ h = {}
200
+ header.map {|s|
201
+ s ||= ''
202
+ if h[s]
203
+ s += "(2)" if /\(\d+\)\z/ !~ s
204
+ while h[s]
205
+ s = s.sub(/\((\d+)\)\z/) { n = $1.to_i; "(#{n+1})" }
206
+ end
207
+ s
208
+ end
209
+ h[s] = true
210
+ s
211
+ }
212
+ end
213
+ end
@@ -0,0 +1,129 @@
1
+ # lib/tb/record.rb - record class for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb::Record
28
+ include Enumerable
29
+
30
+ def initialize(table, recordid)
31
+ @table = table
32
+ @recordid = recordid
33
+ end
34
+ attr_reader :table
35
+
36
+ def record_id
37
+ @recordid
38
+ end
39
+
40
+ def pretty_print(q) # :nodoc:
41
+ q.object_group(self) {
42
+ fs = @table.list_fields.reject {|f| self[f].nil? }
43
+ unless fs.empty?
44
+ q.text ':'
45
+ q.breakable
46
+ end
47
+ q.seplist(fs, nil, :each) {|f|
48
+ v = self[f]
49
+ q.group {
50
+ q.pp f
51
+ q.text '=>'
52
+ q.group(1) {
53
+ q.breakable ''
54
+ q.pp v
55
+ }
56
+ }
57
+ }
58
+ }
59
+ end
60
+ alias inspect pretty_print_inspect # :nodoc:
61
+
62
+ def has_field?(field)
63
+ @table.has_field?(field)
64
+ end
65
+
66
+ def [](field)
67
+ @table.get_cell(@recordid, field)
68
+ end
69
+
70
+ def []=(field, value)
71
+ @table.set_cell(@recordid, field, value)
72
+ end
73
+
74
+ def to_h
75
+ h = {}
76
+ @table.each_field {|f|
77
+ v = @table.get_cell(@recordid, f)
78
+ h[f] = v if !v.nil?
79
+ }
80
+ h
81
+ end
82
+
83
+ def to_h_with_reserved
84
+ h = {}
85
+ @table.each_field_with_reserved {|f|
86
+ v = @table.get_cell(@recordid, f)
87
+ h[f] = v if !v.nil?
88
+ }
89
+ h
90
+ end
91
+
92
+ def to_a
93
+ a = {}
94
+ @table.each_field {|f|
95
+ v = @table.get_cell(@recordid, f)
96
+ a << [f, v] if !v.nil?
97
+ }
98
+ a
99
+ end
100
+
101
+ def to_a_with_reserved
102
+ a = {}
103
+ @table.each_field_with_reserved {|f|
104
+ v = @table.get_cell(@recordid, f)
105
+ a << [f, v] if !v.nil?
106
+ }
107
+ a
108
+ end
109
+
110
+ def each
111
+ @table.each_field {|f|
112
+ v = @table.get_cell(@recordid, f)
113
+ yield [f, v] if !v.nil?
114
+ }
115
+ nil
116
+ end
117
+
118
+ def each_with_reserved
119
+ @table.each_field_reserved {|f|
120
+ v = @table.get_cell(@recordid, f)
121
+ yield [f, v] if !v.nil?
122
+ }
123
+ nil
124
+ end
125
+
126
+ def values_at(*fields)
127
+ fields.map {|f| self[f] }
128
+ end
129
+ end
@@ -0,0 +1,93 @@
1
+ # lib/tb/tsv.rb - TSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'stringio'
28
+
29
+ class Tb
30
+ def Tb.tsv_stream_input(tsv)
31
+ tsvreader = TSVReader.new(tsv)
32
+ while ary = tsvreader.shift
33
+ yield ary
34
+ end
35
+ nil
36
+ end
37
+
38
+ class TSVReader
39
+ def initialize(input)
40
+ if input.respond_to? :to_str
41
+ @input = StringIO.new(input)
42
+ else
43
+ @input = input
44
+ end
45
+ end
46
+
47
+ def shift
48
+ line = @input.gets
49
+ return nil if !line
50
+ line = line.chomp("\n")
51
+ line = line.chomp("\r")
52
+ line.split(/\t/, -1)
53
+ end
54
+
55
+ def close
56
+ @input.close
57
+ end
58
+ end
59
+
60
+ def Tb.tsv_stream_output(out)
61
+ gen = Object.new
62
+ gen.instance_variable_set(:@out, out)
63
+ def gen.<<(ary)
64
+ @out << Tb.tsv_fields_join(ary) << "\n"
65
+ end
66
+ yield gen
67
+ end
68
+
69
+ # :call-seq:
70
+ # generate_tsv(out='', fields=nil) {|recordids| modified_recordids }
71
+ # generate_tsv(out='', fields=nil)
72
+ #
73
+ def generate_tsv(out='', fields=nil, &block)
74
+ if fields.nil?
75
+ fields = list_fields
76
+ end
77
+ recordids = list_recordids
78
+ if block_given?
79
+ recordids = yield(recordids)
80
+ end
81
+ Tb.tsv_stream_output(out) {|gen|
82
+ gen << fields
83
+ recordids.each {|recordid|
84
+ gen << get_values(recordid, *fields)
85
+ }
86
+ }
87
+ out
88
+ end
89
+
90
+ def Tb.tsv_fields_join(values)
91
+ values.map {|v| v.to_s.gsub(/[\t\r\n]/, ' ') }.join("\t")
92
+ end
93
+ end