tb 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/README +156 -5
  2. data/bin/tb +2 -1110
  3. data/lib/tb.rb +4 -2
  4. data/lib/tb/catreader.rb +131 -0
  5. data/lib/tb/cmd_cat.rb +65 -0
  6. data/lib/tb/cmd_consecutive.rb +79 -0
  7. data/lib/tb/cmd_crop.rb +105 -0
  8. data/lib/tb/cmd_cross.rb +119 -0
  9. data/lib/tb/cmd_csv.rb +42 -0
  10. data/lib/tb/cmd_cut.rb +77 -0
  11. data/lib/tb/cmd_grep.rb +76 -0
  12. data/lib/tb/cmd_group.rb +82 -0
  13. data/lib/tb/cmd_gsub.rb +77 -0
  14. data/lib/tb/cmd_help.rb +98 -0
  15. data/lib/tb/cmd_join.rb +81 -0
  16. data/lib/tb/cmd_json.rb +60 -0
  17. data/lib/tb/cmd_ls.rb +273 -0
  18. data/lib/tb/cmd_mheader.rb +77 -0
  19. data/lib/tb/cmd_newfield.rb +59 -0
  20. data/lib/tb/cmd_pnm.rb +43 -0
  21. data/lib/tb/cmd_pp.rb +70 -0
  22. data/lib/tb/cmd_rename.rb +58 -0
  23. data/lib/tb/cmd_shape.rb +67 -0
  24. data/lib/tb/cmd_sort.rb +58 -0
  25. data/lib/tb/cmd_svn_log.rb +158 -0
  26. data/lib/tb/cmd_tsv.rb +43 -0
  27. data/lib/tb/cmd_yaml.rb +47 -0
  28. data/lib/tb/cmdmain.rb +45 -0
  29. data/lib/tb/cmdtop.rb +58 -0
  30. data/lib/tb/cmdutil.rb +327 -0
  31. data/lib/tb/csv.rb +30 -6
  32. data/lib/tb/fieldset.rb +39 -41
  33. data/lib/tb/pager.rb +132 -0
  34. data/lib/tb/pnm.rb +357 -0
  35. data/lib/tb/reader.rb +18 -128
  36. data/lib/tb/record.rb +3 -3
  37. data/lib/tb/ropen.rb +70 -0
  38. data/lib/tb/{pathfinder.rb → search.rb} +69 -34
  39. data/lib/tb/tsv.rb +29 -1
  40. data/sample/colors.ppm +0 -0
  41. data/sample/gradation.pgm +0 -0
  42. data/sample/langs.csv +46 -0
  43. data/sample/tbplot +293 -0
  44. data/test-all-cov.rb +65 -0
  45. data/test-all.rb +5 -0
  46. data/test/test_basic.rb +99 -2
  47. data/test/test_catreader.rb +27 -0
  48. data/test/test_cmd_cat.rb +118 -0
  49. data/test/test_cmd_consecutive.rb +90 -0
  50. data/test/test_cmd_crop.rb +101 -0
  51. data/test/test_cmd_cross.rb +113 -0
  52. data/test/test_cmd_csv.rb +129 -0
  53. data/test/test_cmd_cut.rb +100 -0
  54. data/test/test_cmd_grep.rb +89 -0
  55. data/test/test_cmd_group.rb +181 -0
  56. data/test/test_cmd_gsub.rb +103 -0
  57. data/test/test_cmd_help.rb +190 -0
  58. data/test/test_cmd_join.rb +197 -0
  59. data/test/test_cmd_json.rb +75 -0
  60. data/test/test_cmd_ls.rb +203 -0
  61. data/test/test_cmd_mheader.rb +86 -0
  62. data/test/test_cmd_newfield.rb +63 -0
  63. data/test/test_cmd_pnm.rb +35 -0
  64. data/test/test_cmd_pp.rb +62 -0
  65. data/test/test_cmd_rename.rb +91 -0
  66. data/test/test_cmd_shape.rb +50 -0
  67. data/test/test_cmd_sort.rb +105 -0
  68. data/test/test_cmd_tsv.rb +67 -0
  69. data/test/test_cmd_yaml.rb +55 -0
  70. data/test/test_cmdtty.rb +154 -0
  71. data/test/test_cmdutil.rb +43 -0
  72. data/test/test_csv.rb +10 -0
  73. data/test/test_fieldset.rb +42 -0
  74. data/test/test_pager.rb +142 -0
  75. data/test/test_pnm.rb +374 -0
  76. data/test/test_reader.rb +147 -0
  77. data/test/test_record.rb +49 -0
  78. data/test/test_search.rb +575 -0
  79. data/test/test_tsv.rb +7 -0
  80. metadata +108 -5
  81. data/lib/tb/qtsv.rb +0 -93
@@ -27,6 +27,35 @@
27
27
  require 'stringio'
28
28
 
29
29
  class Tb
30
+ def Tb.load_tsv(filename, *header_fields, &block)
31
+ Tb.parse_tsv(File.read(filename), *header_fields, &block)
32
+ end
33
+
34
+ def Tb.parse_tsv(tsv, *header_fields)
35
+ aa = []
36
+ tsv_stream_input(tsv) {|ary|
37
+ aa << ary
38
+ }
39
+ aa = yield aa if block_given?
40
+ if header_fields.empty?
41
+ reader = Tb::Reader.new(aa)
42
+ arys = []
43
+ reader.each {|ary|
44
+ arys << ary
45
+ }
46
+ header = reader.header
47
+ else
48
+ header = header_fields
49
+ arys = aa
50
+ end
51
+ t = Tb.new(header)
52
+ arys.each {|ary|
53
+ ary << nil while ary.length < header.length
54
+ t.insert_values header, ary
55
+ }
56
+ t
57
+ end
58
+
30
59
  def Tb.tsv_stream_input(tsv)
31
60
  tsvreader = TSVReader.new(tsv)
32
61
  while ary = tsvreader.shift
@@ -53,7 +82,6 @@ class Tb
53
82
  end
54
83
 
55
84
  def close
56
- @input.close
57
85
  end
58
86
  end
59
87
 
Binary file
Binary file
@@ -0,0 +1,46 @@
1
+ language,year
2
+ FORTRAN,1955
3
+ LISP,1958
4
+ COBOL,1959
5
+ ALGOL 58,1958
6
+ APL,1962
7
+ Simula,1962
8
+ SNOBOL,1962
9
+ BASIC,1964
10
+ PL/I,1964
11
+ BCPL,1967
12
+ Logo,1968
13
+ B,1969
14
+ Pascal,1970
15
+ Forth,1970
16
+ C,1972
17
+ Smalltalk,1972
18
+ Prolog,1972
19
+ ML,1973
20
+ Scheme,1975
21
+ SQL,1978
22
+ C++,1980
23
+ Objective-C,1983
24
+ Ada,1983
25
+ Common Lisp,1984
26
+ Eiffel,1985
27
+ Erlang,1986
28
+ Perl,1987
29
+ Tcl,1988
30
+ Haskell,1990
31
+ Python,1991
32
+ Visual Basic,1991
33
+ Ruby,1993
34
+ Lua,1993
35
+ CLOS,1994
36
+ Java,1995
37
+ Delphi,1995
38
+ JavaScript,1995
39
+ PHP,1995
40
+ D,1999
41
+ C#,2001
42
+ F#,2002
43
+ Groovy,2003
44
+ Scala,2003
45
+ Clojure,2007
46
+ Go,2009
@@ -0,0 +1,293 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice, this
9
+ # list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
+ # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
+ # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
+ # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
+ # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
+ # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
+ # OF SUCH DAMAGE.
26
+
27
+ # plot a graph using gnuplot.
28
+
29
+ # usage:
30
+ # tbplot [-x field] [-y field] [--shape field] [--color field] [--size field] [--facet-x field] [--facet-y field] filename
31
+
32
+ $:.unshift '/home/akr/ruby/tb/lib'
33
+
34
+ require 'optparse'
35
+ require 'tempfile'
36
+ require 'time'
37
+ require 'tb'
38
+
39
+ def err(msg)
40
+ STDERR.puts msg
41
+ exit false
42
+ end
43
+
44
+ def gnuplot_escape_string(string)
45
+ string = string.dup.force_encoding("ascii-8bit") if string.respond_to? :force_encoding
46
+ '"' + string.gsub(/[^A-Za-z]/) {|c| sprintf("\\%03o", c.ord) } + '"'
47
+ end
48
+
49
+ class ValueChecker
50
+ def initialize
51
+ @total = 0
52
+ @numeric = 0
53
+ @numeric_min = @numeric_max = nil
54
+ @time = 0
55
+ @time_min = @time_max = nil
56
+ @values = {}
57
+ end
58
+ attr_reader :total, :numeric, :time
59
+ attr_reader :numeric_min, :numeric_max
60
+ attr_reader :time_min, :time_max
61
+ attr_reader :values
62
+
63
+ def categorical_index(val)
64
+ @values.fetch(val)
65
+ end
66
+
67
+ def check(val)
68
+ ret = val
69
+ if val.kind_of? Numeric
70
+ @numeric += 1
71
+ if @numeric == 1
72
+ @numeric_min = @numeric_max = val
73
+ else
74
+ @numeric_min = val if val < @numeric_min
75
+ @numeric_max = val if @numeric_max < val
76
+ end
77
+ elsif /\A\s*-?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?\s*\z/ =~ val
78
+ @numeric += 1
79
+ if @numeric == 1
80
+ @numeric_min = @numeric_max = val
81
+ else
82
+ @numeric_min = val if val < @numeric_min
83
+ @numeric_max = val if @numeric_max < val
84
+ end
85
+ elsif /\A\s*-?\d+-\d\d(-\d\d(T\d\d(:\d\d(:\d\d(\.\d+)?(Z|[+-]\d\d:?\d\d)?)?)?)?)?\s*\z/ =~ val ||
86
+ /\A\s*-?\d+\d\d\d\dT\d\d(\d\d(\d\d(\.\d+)?(Z|[+-]\d\d:?\d\d)?)?)?\s*\z/ =~ val
87
+ @time += 1
88
+ t = Time.parse(val).utc
89
+ t = tweak_timezone(t)
90
+ if @time == 1
91
+ @time_min = @time_max = t
92
+ else
93
+ @time_min = t if t < @time_min
94
+ @time_max = t if @time_max < t
95
+ end
96
+ ret = t.iso8601
97
+ end
98
+ @values[ret] ||= @values.size + 1
99
+ ret
100
+ end
101
+
102
+ def tweak_timezone(t)
103
+ # gnuplot supports UTC only.
104
+ # So this (not-so-valid) tweak is required.
105
+ unless defined? @time_offset
106
+ @time_offset = Time.now.utc_offset
107
+ end
108
+ t + @time_offset
109
+ end
110
+
111
+ def tic_format
112
+ if 0 < @time
113
+ if @time_min.year == @time_max.year &&
114
+ @time_min.mon == @time_max.mon &&
115
+ @time_min.day == @time_max.day
116
+ "%H:%M"
117
+ else
118
+ "%Y\n%m-%d"
119
+ end
120
+ else
121
+ "% g"
122
+ end
123
+ end
124
+ end
125
+
126
+ $x_field = nil
127
+ $y_field = nil
128
+ $shape_field = nil
129
+ $color_field = nil
130
+ $size_field = nil
131
+ $facet_x_field = nil
132
+ $facet_y_field = nil
133
+ $max_size = nil
134
+
135
+ op = OptionParser.new
136
+ op.def_option('-x FIELD', 'x-field') {|f| $x_field = f }
137
+ op.def_option('-y FIELD', 'y-field') {|f| $y_field = f }
138
+ op.def_option('--shape=FIELD', 'shape-field') {|f| $shape_field = f }
139
+ op.def_option('--color=FIELD', 'color-field') {|f| $color_field = f }
140
+ op.def_option('--size=FIELD', 'size-field') {|f| $size_field = f }
141
+ op.def_option('--facet-x=FIELD', 'facet-x-field') {|f| $facet_x_field = f }
142
+ op.def_option('--facet-y=FIELD', 'facet-y-field') {|f| $facet_y_field = f }
143
+ op.def_option('--shapecolor=FIELD', 'shape-field and color-field') {|f| $shape_field = $color_field = f }
144
+ op.def_option('--max-size=MAX-SIZE', 'maximum point size') {|v| $max_size = v.to_f }
145
+
146
+ op.parse!(ARGV)
147
+
148
+ if !$max_size
149
+ if $size_field
150
+ $max_size = 100.0
151
+ else
152
+ $max_size = 1.0
153
+ end
154
+ end
155
+
156
+ tmps = {}
157
+
158
+ argv = ARGV.empty? ? ['-'] : ARGV
159
+ Tb::CatReader.open(argv) {|r|
160
+ header0 = r.header
161
+ header = header0.dup
162
+ [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].each {|f|
163
+ if f && !header.include?(f)
164
+ err("field not found: #{f.inspect}")
165
+ end
166
+ }
167
+ header -= [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].compact
168
+ if !$x_field
169
+ if header.empty?
170
+ err("x-field not found")
171
+ end
172
+ $x_field = header.shift
173
+ end
174
+ if !$y_field
175
+ if header.empty?
176
+ err("y-field not found")
177
+ end
178
+ $y_field = header.shift
179
+ end
180
+ checkers = {}
181
+ uniq_fields = [$x_field, $y_field, $shape_field, $color_field, $size_field, $facet_x_field, $facet_y_field].compact.uniq
182
+ r.each {|ary|
183
+ next if uniq_fields.any? {|f| ary[r.index_from_field(f)].nil? }
184
+ vs = {}
185
+ uniq_fields.each {|f|
186
+ v = ary[r.index_from_field(f)]
187
+ checkers[f] ||= ValueChecker.new
188
+ vs[f] = checkers[f].check(v)
189
+ }
190
+ x = vs[$x_field]
191
+ y = vs[$y_field]
192
+
193
+ shape_value = color_value = nil
194
+ size = 1
195
+ facet_x_field = facet_y_field = nil
196
+ if $shape_field
197
+ shape = vs[$shape_field]
198
+ end
199
+ if $color_field
200
+ color = vs[$color_field]
201
+ end
202
+ if $size_field
203
+ size = vs[$size_field]
204
+ end
205
+ if $facet_x_field
206
+ facet_x = vs[$facet_x_field]
207
+ end
208
+ if $facet_y_field
209
+ facet_y = vs[$facet_y_field]
210
+ end
211
+ key1 = [facet_x, facet_y]
212
+ key2 = [shape, color]
213
+ tmps[key1] ||= {}
214
+ tmps[key1][key2] ||= Tempfile.new('tbplot')
215
+ tmps[key1][key2].puts "#{x} #{y} #{size}"
216
+ }
217
+ tmps.each {|k1, h| h.each {|k2, v| v.close } }
218
+ gnuplot_command = ''
219
+ if 0 < checkers[$x_field].time || 0 < checkers[$y_field].time
220
+ gnuplot_command << 'set timefmt "%Y-%m-%dT%H:%M:%SZ"' << "\n"
221
+ end
222
+ if 0 < checkers[$x_field].time
223
+ gnuplot_command << "set xdata time\n"
224
+ gnuplot_command << "set format x #{gnuplot_escape_string(checkers[$x_field].tic_format)}\n"
225
+ end
226
+ if 0 < checkers[$y_field].time
227
+ gnuplot_command << "set ydata time\n"
228
+ gnuplot_command << "set format y #{gnuplot_escape_string(checkers[$y_field].tic_format)}\n"
229
+ end
230
+ gnuplot_command << "set xlabel #{gnuplot_escape_string($x_field)}\n"
231
+ gnuplot_command << "set ylabel #{gnuplot_escape_string($y_field)}\n"
232
+ if $size_field
233
+ pointsize = $max_size / checkers[$size_field].numeric_max
234
+ gnuplot_command << "set pointsize #{pointsize}\n"
235
+ end
236
+ if $facet_x_field || $facet_y_field
237
+ xsize = $facet_x_field ? 1.0 / checkers[$facet_x_field].values.size : 1.0
238
+ ysize = $facet_y_field ? 1.0 / checkers[$facet_y_field].values.size : 1.0
239
+ gnuplot_command << "set multiplot\n"
240
+ gnuplot_command << "set size #{xsize},#{ysize}\n"
241
+ end
242
+ tmps.each {|(facet_x, facet_y), h|
243
+ if $facet_x_field || $facet_y_field
244
+ xorigin = $facet_x_field ? (checkers[$facet_x_field].categorical_index(facet_x)-1.0)/checkers[$facet_x_field].values.size : 0
245
+ yorigin = $facet_y_field ? 1.0-(checkers[$facet_y_field].categorical_index(facet_y)-0.0)/checkers[$facet_y_field].values.size : 0
246
+ gnuplot_command << "set origin #{xorigin},#{yorigin}\n"
247
+ end
248
+ gnuplot_command << 'plot '
249
+ first = true
250
+ h.each {|(shape_value, color_value), tmp|
251
+ if $shape_field
252
+ shape = checkers[$shape_field].categorical_index(shape_value)
253
+ end
254
+ if $color_field
255
+ color = checkers[$color_field].categorical_index(color_value)
256
+ end
257
+ gnuplot_command << ",\\\n" if !first
258
+ gnuplot_command << gnuplot_escape_string(tmp.path)
259
+ gnuplot_command << ' using 1:2:3 '
260
+ title = []
261
+ if shape_value
262
+ title << " #{$shape_field}=#{shape_value.to_s}"
263
+ end
264
+ if color_value
265
+ title << " #{$color_field}=#{color_value.to_s}"
266
+ end
267
+ title = title.uniq.join(' ')
268
+ gnuplot_command << ' title ' << gnuplot_escape_string(title)
269
+ gnuplot_command << ' with points'
270
+ if shape
271
+ gnuplot_command << " pointtype " << shape.to_s # xxx: some mapping
272
+ else
273
+ gnuplot_command << " pointtype 1"
274
+ end
275
+ if color
276
+ gnuplot_command << " linecolor " << color.to_s # xxx: some mapping
277
+ else
278
+ gnuplot_command << " linecolor 1"
279
+ end
280
+ gnuplot_command << " pointsize variable"
281
+ first = false
282
+ }
283
+ gnuplot_command << "\n"
284
+ }
285
+ if $facet_x_field || $facet_y_field
286
+ gnuplot_command << "unset multiplot\n"
287
+ end
288
+ gnuplot_command << "pause mouse any\n"
289
+ tmp_gnuplot_command = Tempfile.new(['tbplot', '.gp'])
290
+ tmp_gnuplot_command << gnuplot_command
291
+ tmp_gnuplot_command.close
292
+ system('gnuplot', tmp_gnuplot_command.path)
293
+ }
@@ -0,0 +1,65 @@
1
+ require "coverage.so"
2
+
3
+ def expand_tab(str, tabstop=8)
4
+ col = 0
5
+ str.gsub(/(\t+)|[^\t]+/) {
6
+ if $1
7
+ ' ' * (($1.length * tabstop) - (col + 1) % tabstop)
8
+ else
9
+ $&
10
+ end
11
+ }
12
+ end
13
+
14
+ at_exit {
15
+ r = Coverage.result
16
+ fs = r.keys.sort.reject {|f|
17
+ %r{lib/tb[/.]} !~ f
18
+ }
19
+ if !fs.empty?
20
+ if STDOUT.tty?
21
+ out = IO.popen(['less', '-S', '-j20', '+/ 0:'], 'w')
22
+ else
23
+ out = STDOUT
24
+ end
25
+ pat = nil
26
+ fs[0].chars.to_a.reverse_each {|ch|
27
+ if !pat
28
+ pat = "#{Regexp.escape(ch)}?"
29
+ else
30
+ pat = "(?:#{Regexp.escape(ch)}#{pat})?"
31
+ end
32
+ }
33
+ pat = Regexp.compile(pat)
34
+ prefix_len = fs[0].length
35
+ fs.each {|f|
36
+ l = pat.match(f).end(0)
37
+ prefix_len = l if l < prefix_len
38
+ }
39
+ prefix = fs[0][0, prefix_len]
40
+ prefix.sub!(%r{[^/]+\z}, '')
41
+ fs.each {|f|
42
+ next if %r{lib/tb[/.]} !~ f
43
+ f0 = f[prefix.length..-1]
44
+ ns = r[f]
45
+ max = ns.compact.max
46
+ w = max.to_s.length
47
+ fmt1 = "%s %#{w}d:%s"
48
+ fmt2 = "%s #{" " * w}:%s"
49
+ File.foreach(f).with_index {|line, i|
50
+ line = expand_tab(line)
51
+ if ns[i]
52
+ out.puts fmt1 % [f0, ns[i], line]
53
+ else
54
+ out.puts fmt2 % [f0, line]
55
+ end
56
+ }
57
+ }
58
+ if out != STDOUT
59
+ out.close
60
+ end
61
+ end
62
+ }
63
+ Coverage.start
64
+
65
+ load 'test-all.rb'