tb 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ # lib/tb/qtsv.rb - quoted TSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb
28
+
29
+ # quoted TSV is a variant of TSV (tab separated value)
30
+ #
31
+ # All non-empty values are quoted by double-quotes.
32
+
33
+ def Tb.load_qtsv(filename, *header_fields, &block)
34
+ Tb.parse_qtsv(File.read(filename), *header_fields, &block)
35
+ end
36
+
37
+ def Tb.qtsv_stream_input(qtsv)
38
+ qtsv = qtsv.read unless String === qtsv
39
+ qtsv = qtsv.dup
40
+ cells = []
41
+ verify = ''
42
+ qtsv.scan(/\G("(.*?)"|)(\t|\n|\r\n|\z)/m) {
43
+ verify << $&
44
+ cell = $2
45
+ sep = $3
46
+ break if cell.nil? && sep.empty?
47
+ cells << cell
48
+ if sep != "\t"
49
+ yield cells
50
+ cells = []
51
+ end
52
+ }
53
+ if verify != qtsv
54
+ if qtsv.start_with?(verify)
55
+ raise "unexpected scan ('verify' is a prefix of 'qtsv')"
56
+ end
57
+ if verify.length != qtsv.length
58
+ raise "unexpected scan (length differ: orig:#{qtsv.length} verify:#{verify.length})"
59
+ end
60
+ raise "unexpected scan"
61
+ end
62
+ nil
63
+ end
64
+
65
+ def Tb.parse_qtsv(qtsv, *header_fields)
66
+ aa = []
67
+ qtsv_stream_input(qtsv) {|ary|
68
+ aa << ary
69
+ }
70
+ aa = yield aa if block_given?
71
+ if header_fields.empty?
72
+ aa.shift while aa.first.all? {|elt| elt.nil? || elt == '' }
73
+ header_fields = aa.shift
74
+ h = Hash.new(0)
75
+ header_fields.each_with_index {|f, i|
76
+ if h.include? f
77
+ raise ArgumentError, "ambiguous header field: #{f.inspect} (#{h[f]}th and #{i}th)"
78
+ end
79
+ h[f] = i
80
+ }
81
+ end
82
+ t = Tb.new(header_fields)
83
+ aa.each {|ary|
84
+ h = {}
85
+ header_fields.each_with_index {|f, i|
86
+ h[f] = ary[i]
87
+ }
88
+ t.insert(h)
89
+ }
90
+ t
91
+ end
92
+
93
+ end
@@ -0,0 +1,213 @@
1
+ # lib/tb/reader.rb - Tb::Reader class
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb
28
+ def Tb.load_csv(filename, *header_fields, &block)
29
+ Tb.parse_csv(File.read(filename), *header_fields, &block)
30
+ end
31
+
32
+ def Tb.parse_csv(csv, *header_fields)
33
+ aa = []
34
+ csv_stream_input(csv) {|ary|
35
+ aa << ary
36
+ }
37
+ aa = yield aa if block_given?
38
+ if header_fields.empty?
39
+ reader = Tb::Reader.new(aa)
40
+ arys = []
41
+ reader.each {|ary|
42
+ arys << ary
43
+ }
44
+ header = reader.header
45
+ else
46
+ header = header_fields
47
+ arys = aa
48
+ end
49
+ t = Tb.new(header)
50
+ arys.each {|ary|
51
+ ary << nil while ary.length < header.length
52
+ t.insert_values header, ary
53
+ }
54
+ t
55
+ end
56
+
57
+ def Tb.load_tsv(filename, *header_fields, &block)
58
+ Tb.parse_tsv(File.read(filename), *header_fields, &block)
59
+ end
60
+
61
+ def Tb.parse_tsv(tsv, *header_fields)
62
+ aa = []
63
+ tsv_stream_input(tsv) {|ary|
64
+ aa << ary
65
+ }
66
+ aa = yield aa if block_given?
67
+ if header_fields.empty?
68
+ reader = Tb::Reader.new(aa)
69
+ arys = []
70
+ reader.each {|ary|
71
+ arys << ary
72
+ }
73
+ header = reader.header
74
+ else
75
+ header = header_fields
76
+ arys = aa
77
+ end
78
+ t = Tb.new(header)
79
+ arys.each {|ary|
80
+ ary << nil while ary.length < header.length
81
+ t.insert_values header, ary
82
+ }
83
+ t
84
+ end
85
+ end
86
+
87
+ class Tb::Reader
88
+ def self.open(filename, opts={})
89
+ io = nil
90
+ case filename
91
+ when /\.csv\z/
92
+ io = File.open(filename)
93
+ rawreader = Tb::CSVReader.new(io)
94
+ when /\.tsv\z/
95
+ io = File.open(filename)
96
+ rawreader = Tb::TSVReader.new(io)
97
+ when /\Acsv:/
98
+ io = File.open($')
99
+ rawreader = Tb::CSVReader.new(io)
100
+ when /\Atsv:/
101
+ io = File.open($')
102
+ rawreader = Tb::TSVReader.new(io)
103
+ else
104
+ if filename == '-'
105
+ rawreader = Tb::CSVReader.new(STDIN)
106
+ else
107
+ # guess table format?
108
+ io = File.open(filename)
109
+ rawreader = Tb::CSVReader.new(io)
110
+ end
111
+ end
112
+ reader = self.new(rawreader, opts)
113
+ if block_given?
114
+ begin
115
+ yield reader
116
+ ensure
117
+ reader.close
118
+ end
119
+ else
120
+ reader
121
+ end
122
+
123
+ end
124
+
125
+ def initialize(rawreader, opts={})
126
+ @opt_n = opts[:numeric]
127
+ @reader = rawreader
128
+ @fieldset = nil
129
+ end
130
+
131
+ def header
132
+ return @fieldset.header if @fieldset
133
+ if @opt_n
134
+ @fieldset = Tb::FieldSet.new
135
+ else
136
+ while ary = @reader.shift
137
+ if ary.all? {|elt| elt.nil? || elt == '' }
138
+ next
139
+ else
140
+ @fieldset = Tb::FieldSet.new(*ary)
141
+ return @fieldset.header
142
+ end
143
+ end
144
+ @fieldset = Tb::FieldSet.new
145
+ end
146
+ return @fieldset.header
147
+ end
148
+
149
+ def index_from_field(f)
150
+ self.header
151
+ if @opt_n
152
+ raise "numeric field start from 1: #{f.inspect}" if /\A0+\z/ =~ f
153
+ raise "numeric field name expected: #{f.inspect}" if /\A(\d+)\z/ !~ f
154
+ $1.to_i - 1
155
+ else
156
+ @fieldset.index_from_field(f)
157
+ end
158
+ end
159
+
160
+ def field_from_index_ex(i)
161
+ raise ArgumentError, "negative index: #{i}" if i < 0
162
+ self.header
163
+ if @opt_n
164
+ if @fieldset.length <= i
165
+ @fieldset.add_fields(*(@fieldset.header.length..i).to_a.map {|j| "#{j+1}" })
166
+ end
167
+ end
168
+ @fieldset.field_from_index_ex(i)
169
+ end
170
+
171
+ def field_from_index(i)
172
+ raise ArgumentError, "negative index: #{i}" if i < 0
173
+ self.header
174
+ if @opt_n
175
+ return "#{i+1}"
176
+ end
177
+ @fieldset.field_from_index(i)
178
+ end
179
+
180
+ def shift
181
+ header
182
+ ary = @reader.shift
183
+ field_from_index_ex(ary.length-1) if ary && !ary.empty?
184
+ ary
185
+ end
186
+
187
+ def each
188
+ while ary = self.shift
189
+ yield ary
190
+ end
191
+ nil
192
+ end
193
+
194
+ def close
195
+ @reader.close
196
+ end
197
+
198
+ def fix_header(header)
199
+ h = {}
200
+ header.map {|s|
201
+ s ||= ''
202
+ if h[s]
203
+ s += "(2)" if /\(\d+\)\z/ !~ s
204
+ while h[s]
205
+ s = s.sub(/\((\d+)\)\z/) { n = $1.to_i; "(#{n+1})" }
206
+ end
207
+ s
208
+ end
209
+ h[s] = true
210
+ s
211
+ }
212
+ end
213
+ end
@@ -0,0 +1,129 @@
1
+ # lib/tb/record.rb - record class for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ class Tb::Record
28
+ include Enumerable
29
+
30
+ def initialize(table, recordid)
31
+ @table = table
32
+ @recordid = recordid
33
+ end
34
+ attr_reader :table
35
+
36
+ def record_id
37
+ @recordid
38
+ end
39
+
40
+ def pretty_print(q) # :nodoc:
41
+ q.object_group(self) {
42
+ fs = @table.list_fields.reject {|f| self[f].nil? }
43
+ unless fs.empty?
44
+ q.text ':'
45
+ q.breakable
46
+ end
47
+ q.seplist(fs, nil, :each) {|f|
48
+ v = self[f]
49
+ q.group {
50
+ q.pp f
51
+ q.text '=>'
52
+ q.group(1) {
53
+ q.breakable ''
54
+ q.pp v
55
+ }
56
+ }
57
+ }
58
+ }
59
+ end
60
+ alias inspect pretty_print_inspect # :nodoc:
61
+
62
+ def has_field?(field)
63
+ @table.has_field?(field)
64
+ end
65
+
66
+ def [](field)
67
+ @table.get_cell(@recordid, field)
68
+ end
69
+
70
+ def []=(field, value)
71
+ @table.set_cell(@recordid, field, value)
72
+ end
73
+
74
+ def to_h
75
+ h = {}
76
+ @table.each_field {|f|
77
+ v = @table.get_cell(@recordid, f)
78
+ h[f] = v if !v.nil?
79
+ }
80
+ h
81
+ end
82
+
83
+ def to_h_with_reserved
84
+ h = {}
85
+ @table.each_field_with_reserved {|f|
86
+ v = @table.get_cell(@recordid, f)
87
+ h[f] = v if !v.nil?
88
+ }
89
+ h
90
+ end
91
+
92
+ def to_a
93
+ a = {}
94
+ @table.each_field {|f|
95
+ v = @table.get_cell(@recordid, f)
96
+ a << [f, v] if !v.nil?
97
+ }
98
+ a
99
+ end
100
+
101
+ def to_a_with_reserved
102
+ a = {}
103
+ @table.each_field_with_reserved {|f|
104
+ v = @table.get_cell(@recordid, f)
105
+ a << [f, v] if !v.nil?
106
+ }
107
+ a
108
+ end
109
+
110
+ def each
111
+ @table.each_field {|f|
112
+ v = @table.get_cell(@recordid, f)
113
+ yield [f, v] if !v.nil?
114
+ }
115
+ nil
116
+ end
117
+
118
+ def each_with_reserved
119
+ @table.each_field_reserved {|f|
120
+ v = @table.get_cell(@recordid, f)
121
+ yield [f, v] if !v.nil?
122
+ }
123
+ nil
124
+ end
125
+
126
+ def values_at(*fields)
127
+ fields.map {|f| self[f] }
128
+ end
129
+ end
@@ -0,0 +1,93 @@
1
+ # lib/tb/tsv.rb - TSV related fetures for table library
2
+ #
3
+ # Copyright (C) 2010-2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ require 'stringio'
28
+
29
+ class Tb
30
+ def Tb.tsv_stream_input(tsv)
31
+ tsvreader = TSVReader.new(tsv)
32
+ while ary = tsvreader.shift
33
+ yield ary
34
+ end
35
+ nil
36
+ end
37
+
38
+ class TSVReader
39
+ def initialize(input)
40
+ if input.respond_to? :to_str
41
+ @input = StringIO.new(input)
42
+ else
43
+ @input = input
44
+ end
45
+ end
46
+
47
+ def shift
48
+ line = @input.gets
49
+ return nil if !line
50
+ line = line.chomp("\n")
51
+ line = line.chomp("\r")
52
+ line.split(/\t/, -1)
53
+ end
54
+
55
+ def close
56
+ @input.close
57
+ end
58
+ end
59
+
60
+ def Tb.tsv_stream_output(out)
61
+ gen = Object.new
62
+ gen.instance_variable_set(:@out, out)
63
+ def gen.<<(ary)
64
+ @out << Tb.tsv_fields_join(ary) << "\n"
65
+ end
66
+ yield gen
67
+ end
68
+
69
+ # :call-seq:
70
+ # generate_tsv(out='', fields=nil) {|recordids| modified_recordids }
71
+ # generate_tsv(out='', fields=nil)
72
+ #
73
+ def generate_tsv(out='', fields=nil, &block)
74
+ if fields.nil?
75
+ fields = list_fields
76
+ end
77
+ recordids = list_recordids
78
+ if block_given?
79
+ recordids = yield(recordids)
80
+ end
81
+ Tb.tsv_stream_output(out) {|gen|
82
+ gen << fields
83
+ recordids.each {|recordid|
84
+ gen << get_values(recordid, *fields)
85
+ }
86
+ }
87
+ out
88
+ end
89
+
90
+ def Tb.tsv_fields_join(values)
91
+ values.map {|v| v.to_s.gsub(/[\t\r\n]/, ' ') }.join("\t")
92
+ end
93
+ end