tb 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/README +156 -5
  2. data/bin/tb +2 -1110
  3. data/lib/tb.rb +4 -2
  4. data/lib/tb/catreader.rb +131 -0
  5. data/lib/tb/cmd_cat.rb +65 -0
  6. data/lib/tb/cmd_consecutive.rb +79 -0
  7. data/lib/tb/cmd_crop.rb +105 -0
  8. data/lib/tb/cmd_cross.rb +119 -0
  9. data/lib/tb/cmd_csv.rb +42 -0
  10. data/lib/tb/cmd_cut.rb +77 -0
  11. data/lib/tb/cmd_grep.rb +76 -0
  12. data/lib/tb/cmd_group.rb +82 -0
  13. data/lib/tb/cmd_gsub.rb +77 -0
  14. data/lib/tb/cmd_help.rb +98 -0
  15. data/lib/tb/cmd_join.rb +81 -0
  16. data/lib/tb/cmd_json.rb +60 -0
  17. data/lib/tb/cmd_ls.rb +273 -0
  18. data/lib/tb/cmd_mheader.rb +77 -0
  19. data/lib/tb/cmd_newfield.rb +59 -0
  20. data/lib/tb/cmd_pnm.rb +43 -0
  21. data/lib/tb/cmd_pp.rb +70 -0
  22. data/lib/tb/cmd_rename.rb +58 -0
  23. data/lib/tb/cmd_shape.rb +67 -0
  24. data/lib/tb/cmd_sort.rb +58 -0
  25. data/lib/tb/cmd_svn_log.rb +158 -0
  26. data/lib/tb/cmd_tsv.rb +43 -0
  27. data/lib/tb/cmd_yaml.rb +47 -0
  28. data/lib/tb/cmdmain.rb +45 -0
  29. data/lib/tb/cmdtop.rb +58 -0
  30. data/lib/tb/cmdutil.rb +327 -0
  31. data/lib/tb/csv.rb +30 -6
  32. data/lib/tb/fieldset.rb +39 -41
  33. data/lib/tb/pager.rb +132 -0
  34. data/lib/tb/pnm.rb +357 -0
  35. data/lib/tb/reader.rb +18 -128
  36. data/lib/tb/record.rb +3 -3
  37. data/lib/tb/ropen.rb +70 -0
  38. data/lib/tb/{pathfinder.rb → search.rb} +69 -34
  39. data/lib/tb/tsv.rb +29 -1
  40. data/sample/colors.ppm +0 -0
  41. data/sample/gradation.pgm +0 -0
  42. data/sample/langs.csv +46 -0
  43. data/sample/tbplot +293 -0
  44. data/test-all-cov.rb +65 -0
  45. data/test-all.rb +5 -0
  46. data/test/test_basic.rb +99 -2
  47. data/test/test_catreader.rb +27 -0
  48. data/test/test_cmd_cat.rb +118 -0
  49. data/test/test_cmd_consecutive.rb +90 -0
  50. data/test/test_cmd_crop.rb +101 -0
  51. data/test/test_cmd_cross.rb +113 -0
  52. data/test/test_cmd_csv.rb +129 -0
  53. data/test/test_cmd_cut.rb +100 -0
  54. data/test/test_cmd_grep.rb +89 -0
  55. data/test/test_cmd_group.rb +181 -0
  56. data/test/test_cmd_gsub.rb +103 -0
  57. data/test/test_cmd_help.rb +190 -0
  58. data/test/test_cmd_join.rb +197 -0
  59. data/test/test_cmd_json.rb +75 -0
  60. data/test/test_cmd_ls.rb +203 -0
  61. data/test/test_cmd_mheader.rb +86 -0
  62. data/test/test_cmd_newfield.rb +63 -0
  63. data/test/test_cmd_pnm.rb +35 -0
  64. data/test/test_cmd_pp.rb +62 -0
  65. data/test/test_cmd_rename.rb +91 -0
  66. data/test/test_cmd_shape.rb +50 -0
  67. data/test/test_cmd_sort.rb +105 -0
  68. data/test/test_cmd_tsv.rb +67 -0
  69. data/test/test_cmd_yaml.rb +55 -0
  70. data/test/test_cmdtty.rb +154 -0
  71. data/test/test_cmdutil.rb +43 -0
  72. data/test/test_csv.rb +10 -0
  73. data/test/test_fieldset.rb +42 -0
  74. data/test/test_pager.rb +142 -0
  75. data/test/test_pnm.rb +374 -0
  76. data/test/test_reader.rb +147 -0
  77. data/test/test_record.rb +49 -0
  78. data/test/test_search.rb +575 -0
  79. data/test/test_tsv.rb +7 -0
  80. metadata +108 -5
  81. data/lib/tb/qtsv.rb +0 -93
@@ -27,6 +27,35 @@
27
27
  require 'stringio'
28
28
 
29
29
  class Tb
30
+ def Tb.load_tsv(filename, *header_fields, &block)
31
+ Tb.parse_tsv(File.read(filename), *header_fields, &block)
32
+ end
33
+
34
+ def Tb.parse_tsv(tsv, *header_fields)
35
+ aa = []
36
+ tsv_stream_input(tsv) {|ary|
37
+ aa << ary
38
+ }
39
+ aa = yield aa if block_given?
40
+ if header_fields.empty?
41
+ reader = Tb::Reader.new(aa)
42
+ arys = []
43
+ reader.each {|ary|
44
+ arys << ary
45
+ }
46
+ header = reader.header
47
+ else
48
+ header = header_fields
49
+ arys = aa
50
+ end
51
+ t = Tb.new(header)
52
+ arys.each {|ary|
53
+ ary << nil while ary.length < header.length
54
+ t.insert_values header, ary
55
+ }
56
+ t
57
+ end
58
+
30
59
  def Tb.tsv_stream_input(tsv)
31
60
  tsvreader = TSVReader.new(tsv)
32
61
  while ary = tsvreader.shift
@@ -53,7 +82,6 @@ class Tb
53
82
  end
54
83
 
55
84
  def close
56
- @input.close
57
85
  end
58
86
  end
59
87
 
Binary file
Binary file
@@ -0,0 +1,46 @@
1
+ language,year
2
+ FORTRAN,1955
3
+ LISP,1958
4
+ COBOL,1959
5
+ ALGOL 58,1958
6
+ APL,1962
7
+ Simula,1962
8
+ SNOBOL,1962
9
+ BASIC,1964
10
+ PL/I,1964
11
+ BCPL,1967
12
+ Logo,1968
13
+ B,1969
14
+ Pascal,1970
15
+ Forth,1970
16
+ C,1972
17
+ Smalltalk,1972
18
+ Prolog,1972
19
+ ML,1973
20
+ Scheme,1975
21
+ SQL,1978
22
+ C++,1980
23
+ Objective-C,1983
24
+ Ada,1983
25
+ Common Lisp,1984
26
+ Eiffel,1985
27
+ Erlang,1986
28
+ Perl,1987
29
+ Tcl,1988
30
+ Haskell,1990
31
+ Python,1991
32
+ Visual Basic,1991
33
+ Ruby,1993
34
+ Lua,1993
35
+ CLOS,1994
36
+ Java,1995
37
+ Delphi,1995
38
+ JavaScript,1995
39
+ PHP,1995
40
+ D,1999
41
+ C#,2001
42
+ F#,2002
43
+ Groovy,2003
44
+ Scala,2003
45
+ Clojure,2007
46
+ Go,2009
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ # plot a graph using gnuplot.
28
+
29
+ # usage:
30
+ # tbplot [-x field] [-y field] [--shape field] [--color field] [--size field] [--facet-x field] [--facet-y field] filename
31
+
32
+ $:.unshift '/home/akr/ruby/tb/lib'
33
+
34
+ require 'optparse'
35
+ require 'tempfile'
36
+ require 'time'
37
+ require 'tb'
38
+
39
+ def err(msg)
40
+ STDERR.puts msg
41
+ exit false
42
+ end
43
+
44
+ def gnuplot_escape_string(string)
45
+ string = string.dup.force_encoding("ascii-8bit") if string.respond_to? :force_encoding
46
+ '"' + string.gsub(/[^A-Za-z]/) {|c| sprintf("\\%03o", c.ord) } + '"'
47
+ end
48
+
49
+ class ValueChecker
50
+ def initialize
51
+ @total = 0
52
+ @numeric = 0
53
+ @numeric_min = @numeric_max = nil
54
+ @time = 0
55
+ @time_min = @time_max = nil
56
+ @values = {}
57
+ end
58
+ attr_reader :total, :numeric, :time
59
+ attr_reader :numeric_min, :numeric_max
60
+ attr_reader :time_min, :time_max
61
+ attr_reader :values
62
+
63
+ def categorical_index(val)
64
+ @values.fetch(val)
65
+ end
66
+
67
+ def check(val)
68
+ ret = val
69
+ if val.kind_of? Numeric
70
+ @numeric += 1
71
+ if @numeric == 1
72
+ @numeric_min = @numeric_max = val
73
+ else
74
+ @numeric_min = val if val < @numeric_min
75
+ @numeric_max = val if @numeric_max < val
76
+ end
77
+ elsif /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/ =~ val
78
+ @numeric += 1
79
+ if @numeric == 1
80
+ @numeric_min = @numeric_max = val
81
+ else
82
+ @numeric_min = val if val < @numeric_min
83
+ @numeric_max = val if @numeric_max < val
84
+ end
85
+ elsif /\A\s*-?\d+-\d\d(-\d\d(T\d\d(:\d\d(:\d\d(\.\d+)?(Z|[+-]\d\d:?\d\d)?)?)?)?)?\s*\z/ =~ val ||
86
+ /\A\s*-?\d+\d\d\d\dT\d\d(\d\d(\d\d(\.\d+)?(Z|[+-]\d\d:?\d\d)?)?)?\s*\z/ =~ val
87
+ @time += 1
88
+ t = Time.parse(val).utc
89
+ t = tweak_timezone(t)
90
+ if @time == 1
91
+ @time_min = @time_max = t
92
+ else
93
+ @time_min = t if t < @time_min
94
+ @time_max = t if @time_max < t
95
+ end
96
+ ret = t.iso8601
97
+ end
98
+ @values[ret] ||= @values.size + 1
99
+ ret
100
+ end
101
+
102
+ def tweak_timezone(t)
103
+ # gnuplot supports UTC only.
104
+ # So this (not-so-valid) tweak is required.
105
+ unless defined? @time_offset
106
+ @time_offset = Time.now.utc_offset
107
+ end
108
+ t + @time_offset
109
+ end
110
+
111
+ def tic_format
112
+ if 0 < @time
113
+ if @time_min.year == @time_max.year &&
114
+ @time_min.mon == @time_max.mon &&
115
+ @time_min.day == @time_max.day
116
+ "%H:%M"
117
+ else
118
+ "%Y\n%m-%d"
119
+ end
120
+ else
121
+ "% g"
122
+ end
123
+ end
124
+ end
125
+
126
+ $x_field = nil
127
+ $y_field = nil
128
+ $shape_field = nil
129
+ $color_field = nil
130
+ $size_field = nil
131
+ $facet_x_field = nil
132
+ $facet_y_field = nil
133
+ $max_size = nil
134
+
135
+ op = OptionParser.new
136
+ op.def_option('-x FIELD', 'x-field') {|f| $x_field = f }
137
+ op.def_option('-y FIELD', 'y-field') {|f| $y_field = f }
138
+ op.def_option('--shape=FIELD', 'shape-field') {|f| $shape_field = f }
139
+ op.def_option('--color=FIELD', 'color-field') {|f| $color_field = f }
140
+ op.def_option('--size=FIELD', 'size-field') {|f| $size_field = f }
141
+ op.def_option('--facet-x=FIELD', 'facet-x-field') {|f| $facet_x_field = f }
142
+ op.def_option('--facet-y=FIELD', 'facet-y-field') {|f| $facet_y_field = f }
143
+ op.def_option('--shapecolor=FIELD', 'shape-field and color-field') {|f| $shape_field = $color_field = f }
144
+ op.def_option('--max-size=MAX-SIZE', 'maximum point size') {|v| $max_size = v.to_f }
145
+
146
+ op.parse!(ARGV)
147
+
148
+ if !$max_size
149
+ if $size_field
150
+ $max_size = 100.0
151
+ else
152
+ $max_size = 1.0
153
+ end
154
+ end
155
+
156
+ tmps = {}
157
+
158
+ argv = ARGV.empty? ? ['-'] : ARGV
159
+ Tb::CatReader.open(argv) {|r|
160
+ header0 = r.header
161
+ header = header0.dup
162
+ [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].each {|f|
163
+ if f && !header.include?(f)
164
+ err("field not found: #{f.inspect}")
165
+ end
166
+ }
167
+ header -= [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].compact
168
+ if !$x_field
169
+ if header.empty?
170
+ err("x-field not found")
171
+ end
172
+ $x_field = header.shift
173
+ end
174
+ if !$y_field
175
+ if header.empty?
176
+ err("y-field not found")
177
+ end
178
+ $y_field = header.shift
179
+ end
180
+ checkers = {}
181
+ uniq_fields = [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].compact.uniq
182
+ r.each {|ary|
183
+ next if uniq_fields.any? {|f| ary[r.index_from_field(f)].nil? }
184
+ vs = {}
185
+ uniq_fields.each {|f|
186
+ v = ary[r.index_from_field(f)]
187
+ checkers[f] ||= ValueChecker.new
188
+ vs[f] = checkers[f].check(v)
189
+ }
190
+ x = vs[$x_field]
191
+ y = vs[$y_field]
192
+
193
+ shape_value = color_value = nil
194
+ size = 1
195
+ facet_x_field = facet_y_field = nil
196
+ if $shape_field
197
+ shape = vs[$shape_field]
198
+ end
199
+ if $color_field
200
+ color = vs[$color_field]
201
+ end
202
+ if $size_field
203
+ size = vs[$size_field]
204
+ end
205
+ if $facet_x_field
206
+ facet_x = vs[$facet_x_field]
207
+ end
208
+ if $facet_y_field
209
+ facet_y = vs[$facet_y_field]
210
+ end
211
+ key1 = [facet_x, facet_y]
212
+ key2 = [shape, color]
213
+ tmps[key1] ||= {}
214
+ tmps[key1][key2] ||= Tempfile.new('tbplot')
215
+ tmps[key1][key2].puts "#{x} #{y} #{size}"
216
+ }
217
+ tmps.each {|k1, h| h.each {|k2, v| v.close } }
218
+ gnuplot_command = ''
219
+ if 0 < checkers[$x_field].time || 0 < checkers[$y_field].time
220
+ gnuplot_command << 'set timefmt "%Y-%m-%dT%H:%M:%SZ"' << "\n"
221
+ end
222
+ if 0 < checkers[$x_field].time
223
+ gnuplot_command << "set xdata time\n"
224
+ gnuplot_command << "set format x #{gnuplot_escape_string(checkers[$x_field].tic_format)}\n"
225
+ end
226
+ if 0 < checkers[$y_field].time
227
+ gnuplot_command << "set ydata time\n"
228
+ gnuplot_command << "set format y #{gnuplot_escape_string(checkers[$y_field].tic_format)}\n"
229
+ end
230
+ gnuplot_command << "set xlabel #{gnuplot_escape_string($x_field)}\n"
231
+ gnuplot_command << "set ylabel #{gnuplot_escape_string($y_field)}\n"
232
+ if $size_field
233
+ pointsize = $max_size / checkers[$size_field].numeric_max
234
+ gnuplot_command << "set pointsize #{pointsize}\n"
235
+ end
236
+ if $facet_x_field || $facet_y_field
237
+ xsize = $facet_x_field ? 1.0 / checkers[$facet_x_field].values.size : 1.0
238
+ ysize = $facet_y_field ? 1.0 / checkers[$facet_y_field].values.size : 1.0
239
+ gnuplot_command << "set multiplot\n"
240
+ gnuplot_command << "set size #{xsize},#{ysize}\n"
241
+ end
242
+ tmps.each {|(facet_x, facet_y), h|
243
+ if $facet_x_field || $facet_y_field
244
+ xorigin = $facet_x_field ? (checkers[$facet_x_field].categorical_index(facet_x)-1.0)/checkers[$facet_x_field].values.size : 0
245
+ yorigin = $facet_y_field ? 1.0-(checkers[$facet_y_field].categorical_index(facet_y)-0.0)/checkers[$facet_y_field].values.size : 0
246
+ gnuplot_command << "set origin #{xorigin},#{yorigin}\n"
247
+ end
248
+ gnuplot_command << 'plot '
249
+ first = true
250
+ h.each {|(shape_value, color_value), tmp|
251
+ if $shape_field
252
+ shape = checkers[$shape_field].categorical_index(shape_value)
253
+ end
254
+ if $color_field
255
+ color = checkers[$color_field].categorical_index(color_value)
256
+ end
257
+ gnuplot_command << ",\\\n" if !first
258
+ gnuplot_command << gnuplot_escape_string(tmp.path)
259
+ gnuplot_command << ' using 1:2:3 '
260
+ title = []
261
+ if shape_value
262
+ title << " #{$shape_field}=#{shape_value.to_s}"
263
+ end
264
+ if color_value
265
+ title << " #{$color_field}=#{color_value.to_s}"
266
+ end
267
+ title = title.uniq.join(' ')
268
+ gnuplot_command << ' title ' << gnuplot_escape_string(title)
269
+ gnuplot_command << ' with points'
270
+ if shape
271
+ gnuplot_command << " pointtype " << shape.to_s # xxx: some mapping
272
+ else
273
+ gnuplot_command << " pointtype 1"
274
+ end
275
+ if color
276
+ gnuplot_command << " linecolor " << color.to_s # xxx: some mapping
277
+ else
278
+ gnuplot_command << " linecolor 1"
279
+ end
280
+ gnuplot_command << " pointsize variable"
281
+ first = false
282
+ }
283
+ gnuplot_command << "\n"
284
+ }
285
+ if $facet_x_field || $facet_y_field
286
+ gnuplot_command << "unset multiplot\n"
287
+ end
288
+ gnuplot_command << "pause mouse any\n"
289
+ tmp_gnuplot_command = Tempfile.new(['tbplot', '.gp'])
290
+ tmp_gnuplot_command << gnuplot_command
291
+ tmp_gnuplot_command.close
292
+ system('gnuplot', tmp_gnuplot_command.path)
293
+ }
@@ -0,0 +1,65 @@
1
+ require "coverage.so"
2
+
3
+ def expand_tab(str, tabstop=8)
4
+ col = 0
5
+ str.gsub(/(\t+)|[^\t]+/) {
6
+ if $1
7
+ ' ' * (($1.length * tabstop) - (col + 1) % tabstop)
8
+ else
9
+ $&
10
+ end
11
+ }
12
+ end
13
+
14
+ at_exit {
15
+ r = Coverage.result
16
+ fs = r.keys.sort.reject {|f|
17
+ %r{lib/tb[/.]} !~ f
18
+ }
19
+ if !fs.empty?
20
+ if STDOUT.tty?
21
+ out = IO.popen(['less', '-S', '-j20', '+/ 0:'], 'w')
22
+ else
23
+ out = STDOUT
24
+ end
25
+ pat = nil
26
+ fs[0].chars.to_a.reverse_each {|ch|
27
+ if !pat
28
+ pat = "#{Regexp.escape(ch)}?"
29
+ else
30
+ pat = "(?:#{Regexp.escape(ch)}#{pat})?"
31
+ end
32
+ }
33
+ pat = Regexp.compile(pat)
34
+ prefix_len = fs[0].length
35
+ fs.each {|f|
36
+ l = pat.match(f).end(0)
37
+ prefix_len = l if l < prefix_len
38
+ }
39
+ prefix = fs[0][0, prefix_len]
40
+ prefix.sub!(%r{[^/]+\z}, '')
41
+ fs.each {|f|
42
+ next if %r{lib/tb[/.]} !~ f
43
+ f0 = f[prefix.length..-1]
44
+ ns = r[f]
45
+ max = ns.compact.max
46
+ w = max.to_s.length
47
+ fmt1 = "%s %#{w}d:%s"
48
+ fmt2 = "%s #{" " * w}:%s"
49
+ File.foreach(f).with_index {|line, i|
50
+ line = expand_tab(line)
51
+ if ns[i]
52
+ out.puts fmt1 % [f0, ns[i], line]
53
+ else
54
+ out.puts fmt2 % [f0, line]
55
+ end
56
+ }
57
+ }
58
+ if out != STDOUT
59
+ out.close
60
+ end
61
+ end
62
+ }
63
+ Coverage.start
64
+
65
+ load 'test-all.rb'