tb 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/README +62 -50
  2. data/bin/tb +22 -18
  3. data/lib/tb.rb +35 -19
  4. data/lib/tb/basic.rb +85 -86
  5. data/lib/tb/catreader.rb +33 -116
  6. data/lib/tb/cmd_cat.rb +31 -27
  7. data/lib/tb/cmd_consecutive.rb +45 -35
  8. data/lib/tb/cmd_crop.rb +86 -52
  9. data/lib/tb/cmd_cross.rb +113 -71
  10. data/lib/tb/cmd_cut.rb +49 -44
  11. data/lib/tb/cmd_git_log.rb +193 -0
  12. data/lib/tb/cmd_grep.rb +43 -32
  13. data/lib/tb/cmd_group.rb +63 -39
  14. data/lib/tb/cmd_gsub.rb +53 -43
  15. data/lib/tb/cmd_help.rb +51 -24
  16. data/lib/tb/cmd_join.rb +32 -35
  17. data/lib/tb/cmd_ls.rb +233 -205
  18. data/lib/tb/cmd_mheader.rb +47 -37
  19. data/lib/tb/cmd_nest.rb +94 -0
  20. data/lib/tb/cmd_newfield.rb +29 -33
  21. data/lib/tb/cmd_rename.rb +40 -32
  22. data/lib/tb/cmd_shape.rb +31 -24
  23. data/lib/tb/cmd_sort.rb +46 -25
  24. data/lib/tb/cmd_svn_log.rb +47 -28
  25. data/lib/tb/cmd_tar_tvf.rb +447 -0
  26. data/lib/tb/cmd_to_csv.rb +60 -0
  27. data/lib/tb/cmd_to_json.rb +60 -0
  28. data/lib/tb/cmd_to_pnm.rb +48 -0
  29. data/lib/tb/cmd_to_pp.rb +71 -0
  30. data/lib/tb/cmd_to_tsv.rb +48 -0
  31. data/lib/tb/cmd_to_yaml.rb +52 -0
  32. data/lib/tb/cmd_unnest.rb +118 -0
  33. data/lib/tb/cmdmain.rb +24 -20
  34. data/lib/tb/cmdtop.rb +33 -25
  35. data/lib/tb/cmdutil.rb +26 -66
  36. data/lib/tb/csv.rb +46 -34
  37. data/lib/tb/enum.rb +294 -0
  38. data/lib/tb/enumerable.rb +198 -7
  39. data/lib/tb/enumerator.rb +73 -0
  40. data/lib/tb/fieldset.rb +27 -19
  41. data/lib/tb/fileenumerator.rb +365 -0
  42. data/lib/tb/json.rb +50 -0
  43. data/lib/tb/pager.rb +6 -6
  44. data/lib/tb/pairs.rb +227 -0
  45. data/lib/tb/pnm.rb +23 -22
  46. data/lib/tb/reader.rb +52 -49
  47. data/lib/tb/record.rb +48 -19
  48. data/lib/tb/revcmp.rb +38 -0
  49. data/lib/tb/ropen.rb +74 -57
  50. data/lib/tb/search.rb +25 -21
  51. data/lib/tb/tsv.rb +31 -34
  52. data/sample/excel2csv +24 -20
  53. data/sample/poi-xls2csv.rb +24 -20
  54. data/sample/poi-xls2csv.sh +22 -18
  55. data/sample/tbplot +185 -127
  56. data/test-all-cov.rb +3 -3
  57. data/test-all.rb +1 -1
  58. data/test/test_basic.rb +26 -10
  59. data/test/test_catreader.rb +7 -6
  60. data/test/test_cmd_cat.rb +32 -0
  61. data/test/test_cmd_consecutive.rb +10 -0
  62. data/test/test_cmd_crop.rb +4 -4
  63. data/test/test_cmd_cross.rb +16 -4
  64. data/test/test_cmd_git_log.rb +46 -0
  65. data/test/test_cmd_help.rb +17 -12
  66. data/test/test_cmd_join.rb +21 -1
  67. data/test/test_cmd_ls.rb +3 -4
  68. data/test/test_cmd_mheader.rb +17 -11
  69. data/test/test_cmd_nest.rb +49 -0
  70. data/test/test_cmd_sort.rb +15 -0
  71. data/test/test_cmd_tar_tvf.rb +281 -0
  72. data/test/{test_cmd_csv.rb → test_cmd_to_csv.rb} +35 -21
  73. data/test/{test_cmd_json.rb → test_cmd_to_json.rb} +31 -3
  74. data/test/{test_cmd_pnm.rb → test_cmd_to_pnm.rb} +2 -2
  75. data/test/{test_cmd_pp.rb → test_cmd_to_pp.rb} +4 -4
  76. data/test/{test_cmd_tsv.rb → test_cmd_to_tsv.rb} +4 -4
  77. data/test/{test_cmd_yaml.rb → test_cmd_to_yaml.rb} +3 -3
  78. data/test/test_cmd_unnest.rb +89 -0
  79. data/test/test_cmdtty.rb +19 -13
  80. data/test/test_enumerable.rb +83 -1
  81. data/test/test_fileenumerator.rb +265 -0
  82. data/test/test_json.rb +15 -0
  83. data/test/test_pager.rb +3 -4
  84. data/test/test_pairs.rb +122 -0
  85. data/test/test_pnm.rb +24 -24
  86. data/test/test_reader.rb +35 -13
  87. data/test/test_revcmp.rb +10 -0
  88. data/test/test_tbenum.rb +173 -0
  89. metadata +51 -23
  90. data/lib/tb/cmd_csv.rb +0 -42
  91. data/lib/tb/cmd_json.rb +0 -60
  92. data/lib/tb/cmd_pnm.rb +0 -43
  93. data/lib/tb/cmd_pp.rb +0 -70
  94. data/lib/tb/cmd_tsv.rb +0 -43
  95. data/lib/tb/cmd_yaml.rb +0 -47
data/lib/tb/enumerable.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # lib/tb/enumerable.rb - extensions for Enumerable
2
2
  #
3
- # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions are met:
@@ -117,13 +117,13 @@ module Enumerable
117
117
  end
118
118
  value_selector = tb_cat_selector_proc(args.pop)
119
119
  key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
- has_seed = opts.include? :seed
120
+ has_seed = opts.has_key? :seed
121
121
  seed_value = opts[:seed]
122
- if opts.include?(:update) && opts.include?(:op)
122
+ if opts.has_key?(:update) && opts.has_key?(:op)
123
123
  raise ArgumentError, "both :op and :update option specified"
124
- elsif opts.include? :update
124
+ elsif opts.has_key? :update
125
125
  update_proc = opts[:update].to_proc
126
- elsif opts.include? :op
126
+ elsif opts.has_key? :op
127
127
  op_proc = opts[:op].to_proc
128
128
  update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
129
  else
@@ -139,11 +139,11 @@ module Enumerable
139
139
  h = result
140
140
  0.upto(ks.length-2) {|i|
141
141
  k = ks[i]
142
- h[k] = {} if !h.include?(k)
142
+ h[k] = {} if !h.has_key?(k)
143
143
  h = h[k]
144
144
  }
145
145
  lastk = ks.last
146
- if !h.include?(lastk)
146
+ if !h.has_key?(lastk)
147
147
  if has_seed
148
148
  h[lastk] = update_proc.call(ks, seed_value, v)
149
149
  else
@@ -281,4 +281,195 @@ module Enumerable
281
281
  tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
282
  end
283
283
 
284
+ def dump_objsfile(title, tempfile)
285
+ tempfile.flush
286
+ path = tempfile
287
+ a = []
288
+ open(path) {|f|
289
+ until f.eof?
290
+ pair = Marshal.load(f)
291
+ a << (pair ? pair.last : :sep)
292
+ end
293
+ }
294
+ puts "#{title}: #{a.inspect}"
295
+ end
296
+ private :dump_objsfile
297
+
298
+ def extsort_by(opts={}, &cmpvalue_from)
299
+ memsize = opts[:memsize] || 10000000
300
+ Enumerator.new {|y|
301
+ extsort_by_internal(memsize, cmpvalue_from, y)
302
+ }
303
+ end
304
+
305
+ def extsort_by_internal(memsize, cmpvalue_from, y)
306
+ tmp1 = Tempfile.new("tbsortA")
307
+ tmp2 = Tempfile.new("tbsortB")
308
+ extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
309
+ if tmp1.size == 0 && tmp2.size == 0
310
+ return Enumerator.new {|_| }
311
+ end
312
+ tmp3 = Tempfile.new("tbsortC")
313
+ tmp4 = Tempfile.new("tbsortD")
314
+ while tmp2.size != 0
315
+ #dump_objsfile(:tmp1, tmp1)
316
+ #dump_objsfile(:tmp2, tmp2)
317
+ #dump_objsfile(:tmp3, tmp3)
318
+ #dump_objsfile(:tmp4, tmp4)
319
+ extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
320
+ tmp1.rewind
321
+ tmp1.truncate(0)
322
+ tmp2.rewind
323
+ tmp2.truncate(0)
324
+ tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
325
+ end
326
+ #dump_objsfile(:tmp1, tmp1)
327
+ #dump_objsfile(:tmp2, tmp2)
328
+ #dump_objsfile(:tmp3, tmp3)
329
+ #dump_objsfile(:tmp4, tmp4)
330
+ extsort_by_strip_cv(tmp1, y)
331
+ ensure
332
+ tmp1.close(true) if tmp1
333
+ tmp2.close(true) if tmp2
334
+ tmp3.close(true) if tmp3
335
+ tmp4.close(true) if tmp4
336
+ end
337
+ private :extsort_by_internal
338
+
339
+ def extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
340
+ prevobj_cv = nil
341
+ tmp_current, tmp_another = tmp1, tmp2
342
+ buf = []
343
+ buf_size = 0
344
+ buf_mode = true
345
+ self.each_with_index {|obj, i|
346
+ obj_cv = cmpvalue_from.call(obj)
347
+ #p [obj, obj_cv]
348
+ #p [prevobj_cv, buf_mode, obj, obj_cv]
349
+ if buf_mode
350
+ dumped = Marshal.dump([obj_cv, obj])
351
+ buf << [obj_cv, i, dumped]
352
+ buf_size += dumped.size
353
+ if memsize < buf_size
354
+ buf.sort!
355
+ buf.each {|_, _, d|
356
+ tmp_current.write d
357
+ }
358
+ prevobj_cv, = buf.last
359
+ buf.clear
360
+ buf_mode = false
361
+ end
362
+ elsif prevobj_cv <= obj_cv
363
+ Marshal.dump([obj_cv, obj], tmp_current)
364
+ prevobj_cv = obj_cv
365
+ else
366
+ dumped = Marshal.dump([obj_cv, obj])
367
+ Marshal.dump(nil, tmp_current)
368
+ buf = [[obj_cv, i, dumped]]
369
+ buf_size = dumped.size
370
+ buf_mode = true
371
+ tmp_current, tmp_another = tmp_another, tmp_current
372
+ end
373
+ }
374
+ if buf_mode
375
+ buf.sort!
376
+ buf.each {|_, _, d|
377
+ tmp_current.write d
378
+ }
379
+ end
380
+ if !buf_mode || !buf.empty?
381
+ Marshal.dump(nil, tmp_current)
382
+ end
383
+ end
384
+ private :extsort_by_first_split
385
+
386
+ def extsort_by_merge(src1, src2, dst1, dst2)
387
+ src1.rewind
388
+ src2.rewind
389
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
390
+ obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
391
+ prefer1 = true
392
+ while true
393
+ cmp = obj1_cv <=> obj2_cv
394
+ if prefer1 ? cmp > 0 : cmp >= 0
395
+ obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
396
+ prefer1 = !prefer1
397
+ end
398
+ Marshal.dump([obj1_cv, obj1], dst1)
399
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
400
+ if !obj1_pair
401
+ begin
402
+ Marshal.dump(obj2_pair, dst1)
403
+ obj2_pair = Marshal.load(src2)
404
+ end until !obj2_pair
405
+ Marshal.dump(nil, dst1)
406
+ dst1, dst2 = dst2, dst1
407
+ break if src1.eof?
408
+ break if src2.eof?
409
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
410
+ obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
411
+ end
412
+ end
413
+ if !src1.eof?
414
+ restsrc = src1
415
+ elsif !src2.eof?
416
+ restsrc = src2
417
+ else
418
+ return
419
+ end
420
+ until restsrc.eof?
421
+ restobj_pair = Marshal.load(restsrc)
422
+ Marshal.dump(restobj_pair, dst1)
423
+ end
424
+ end
425
+ private :extsort_by_merge
426
+
427
+ def extsort_by_strip_cv(tmp1, y)
428
+ tmp1.rewind
429
+ while true
430
+ pair = Marshal.load(tmp1)
431
+ break if !pair
432
+ _, obj = pair
433
+ y.yield obj
434
+ end
435
+ end
436
+ private :extsort_by_strip_cv
437
+
438
+ # splits self by _boundary_p_ which is called with adjacent two elements.
439
+ #
440
+ # _before_group_ is called before each group with the first element.
441
+ # _after_group_ is called after each group with the last element.
442
+ # _body_ is called for each element.
443
+ #
444
+ def each_group_element(boundary_p, before_group, body, after_group)
445
+ prev = nil
446
+ first = true
447
+ self.each {|curr|
448
+ if first
449
+ before_group.call(curr)
450
+ body.call(curr)
451
+ prev = curr
452
+ first = false
453
+ elsif boundary_p.call(prev, curr)
454
+ after_group.call(prev)
455
+ before_group.call(curr)
456
+ body.call(curr)
457
+ prev = curr
458
+ else
459
+ body.call(curr)
460
+ prev = curr
461
+ end
462
+ }
463
+ if !first
464
+ after_group.call(prev)
465
+ end
466
+ end
467
+
468
+ def lazy_map
469
+ Enumerator.new {|y|
470
+ self.each {|*vs|
471
+ y.yield(yield(*vs))
472
+ }
473
+ }
474
+ end
284
475
  end
@@ -0,0 +1,73 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::Yielder
30
+ def initialize(header_proc, each_proc)
31
+ @header_proc_called = false
32
+ @header_proc = header_proc
33
+ @each_proc = each_proc
34
+ end
35
+ attr_reader :header_proc_called
36
+
37
+ def set_header(header)
38
+ raise ArgumentError, "set_header called twice" if @header_proc_called
39
+ @header_proc_called = true
40
+ @header_proc.call(header) if @header_proc
41
+ end
42
+
43
+ def yield(*args)
44
+ if !@header_proc_called
45
+ set_header(nil)
46
+ end
47
+ @each_proc.call(*args)
48
+ end
49
+ alias << yield
50
+ end
51
+
52
+ class Tb::Enumerator
53
+ include Tb::Enum
54
+
55
+ def initialize(&enumerator_proc)
56
+ @enumerator_proc = enumerator_proc
57
+ end
58
+
59
+ def each(&each_proc)
60
+ yielder = Tb::Yielder.new(nil, each_proc)
61
+ @enumerator_proc.call(yielder)
62
+ nil
63
+ end
64
+
65
+ def header_and_each(header_proc, &each_proc)
66
+ yielder = Tb::Yielder.new(header_proc, each_proc)
67
+ @enumerator_proc.call(yielder)
68
+ if !yielder.header_proc_called
69
+ header_proc.call(nil)
70
+ end
71
+ nil
72
+ end
73
+ end
data/lib/tb/fieldset.rb CHANGED
@@ -1,30 +1,38 @@
1
1
  # lib/tb/fieldset.rb - Tb::FieldSet class
2
2
  #
3
- # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
- # modification, are permitted provided that the following conditions are met:
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
7
8
  #
8
- # 1. Redistributions of source code must retain the above copyright notice, this
9
- # list of conditions and the following disclaimer.
10
- # 2. Redistributions in binary form must reproduce the above copyright notice,
11
- # this list of conditions and the following disclaimer in the documentation
12
- # and/or other materials provided with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote products
14
- # derived from this software without specific prior written permission.
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
15
18
  #
16
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
- # OF SUCH DAMAGE.
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
30
 
27
31
  class Tb::FieldSet
32
+ def self.normalize(header)
33
+ Tb::FieldSet.new(*header).header
34
+ end
35
+
28
36
  def initialize(*fs)
29
37
  @header = []
30
38
  @field2index = {}
@@ -0,0 +1,365 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module Enumerable
30
+ # creates a Tb::FileEnumerator object.
31
+ #
32
+ def to_fileenumerator
33
+ Tb::FileEnumerator.new_tempfile {|gen|
34
+ self.each {|*objs|
35
+ gen.call(*objs)
36
+ }
37
+ }
38
+ end
39
+ end
40
+
41
+ # Tb::FileEnumerator is an enumerator backed by a temporally file.
42
+ #
43
+ # An instance of Tb::FileEnumerator can be used just once,
44
+ # except if Tb::FileEnumerator#use is explicitly used.
45
+ #
46
+ # After the use, the temporally file is removed.
47
+ # If the object is not used, the temporally file is removed by GC
48
+ # as usual Tempfile object.
49
+ #
50
+ class Tb::FileEnumerator
51
+ include Enumerable
52
+
53
+ class Builder
54
+ def initialize(klass)
55
+ @klass = klass
56
+ @tempfile = Tempfile.new("tb")
57
+ @tempfile.binmode
58
+ end
59
+
60
+ def gen(*objs)
61
+ Marshal.dump(objs, @tempfile)
62
+ end
63
+
64
+ def new
65
+ @tempfile.close
66
+ @klass.new(
67
+ lambda { open(@tempfile.path, "rb") },
68
+ lambda { @tempfile.close(true) })
69
+ end
70
+ end
71
+
72
+ def self.builder
73
+ Builder.new(Tb::FileEnumerator)
74
+ end
75
+
76
+ def self.new_tempfile
77
+ gen, new = self.gen_new
78
+ yield gen
79
+ new.call
80
+ end
81
+
82
+ def self.gen_new
83
+ builder = self.builder
84
+ return builder.method(:gen), builder.method(:new)
85
+ end
86
+
87
+ def initialize(open_func, remove_func)
88
+ @use_count = 0
89
+ @open_func = open_func
90
+ @remove_func = remove_func
91
+ end
92
+
93
+ def use_count_up
94
+ @use_count += 1
95
+ end
96
+ private :use_count_up
97
+
98
+ def use_count_down
99
+ @use_count -= 1
100
+ if @use_count == 0
101
+ @remove_func.call
102
+ @open_func = @remove_func = nil
103
+ end
104
+ end
105
+ private :use_count_down
106
+
107
+ # delay removing the tempfile until the given block is finished.
108
+ def use
109
+ if !@open_func
110
+ raise ArgumentError, "FileEnumerator reused."
111
+ end
112
+ use_count_up
113
+ begin
114
+ yield
115
+ ensure
116
+ use_count_down
117
+ end
118
+ end
119
+
120
+ def each
121
+ if block_given?
122
+ self.use {
123
+ begin
124
+ io = @open_func.call
125
+ while true
126
+ objs = Marshal.load(io)
127
+ yield(*objs)
128
+ end
129
+ rescue EOFError
130
+ ensure
131
+ io.close
132
+ end
133
+ }
134
+ else
135
+ Reader.new(self)
136
+ end
137
+ end
138
+
139
+ def open_reader
140
+ reader = Reader.new(self)
141
+ reader.use {
142
+ yield reader
143
+ }
144
+ end
145
+
146
+ class Reader
147
+ def initialize(fileenumerator)
148
+ @use_count = 0
149
+ @fileenumerator = fileenumerator
150
+ @fileenumerator.send(:use_count_up)
151
+ @io = @fileenumerator.instance_eval { @open_func.call }
152
+ peek_reset
153
+ end
154
+
155
+ def closed?
156
+ @io.nil?
157
+ end
158
+
159
+ def finalize
160
+ @io.close
161
+ @io = nil
162
+ @fileenumerator.send(:use_count_down)
163
+ peek_reset
164
+ end
165
+ private :finalize
166
+
167
+ def use_begin
168
+ @use_count += 1
169
+ end
170
+
171
+ def use_end
172
+ @use_count -= 1
173
+ if @use_count == 0
174
+ finalize
175
+ end
176
+ end
177
+
178
+ def use
179
+ use_begin
180
+ begin
181
+ yield
182
+ ensure
183
+ use_end
184
+ end
185
+ end
186
+
187
+ def peek_reset
188
+ @peeked = false
189
+ @peeked_objs = nil
190
+ if @io
191
+ @pos = @io.pos
192
+ else
193
+ @pos = nil
194
+ end
195
+ end
196
+ private :peek_reset
197
+
198
+ def pos
199
+ @pos
200
+ end
201
+
202
+ def pos=(val)
203
+ @io.seek(val)
204
+ peek_reset
205
+ nil
206
+ end
207
+
208
+ def peek_values
209
+ if !@io
210
+ raise StopIteration
211
+ end
212
+ if !@peeked
213
+ begin
214
+ objs = Marshal.load(@io)
215
+ rescue EOFError
216
+ if @use_count == 0
217
+ finalize
218
+ end
219
+ raise StopIteration
220
+ end
221
+ @peeked = true
222
+ @peeked_objs = objs
223
+ end
224
+ @peeked_objs
225
+ end
226
+
227
+ def peek
228
+ result = self.peek_values
229
+ if result.kind_of?(Array) && result.length == 1
230
+ result = result[0]
231
+ end
232
+ result
233
+ end
234
+
235
+ def next_values
236
+ result = self.peek_values
237
+ peek_reset
238
+ result
239
+ end
240
+
241
+ def next
242
+ result = self.peek
243
+ peek_reset
244
+ result
245
+ end
246
+
247
+ def rewind
248
+ if !@io
249
+ raise ArgumentError, "already closed."
250
+ end
251
+ @io.rewind
252
+ peek_reset
253
+ nil
254
+ end
255
+
256
+ def subeach_by(&distinguish_value)
257
+ Enumerator.new {|y|
258
+ begin
259
+ vs = self.peek_values
260
+ rescue StopIteration
261
+ next
262
+ end
263
+ dv = distinguish_value.call(*vs)
264
+ while true
265
+ y.yield(*vs)
266
+ self.next_values
267
+ begin
268
+ next_vs = self.peek_values
269
+ rescue StopIteration
270
+ break
271
+ end
272
+ next_dv = distinguish_value.call(*next_vs)
273
+ if dv != next_dv
274
+ break
275
+ end
276
+ vs = next_vs
277
+ dv = next_dv
278
+ end
279
+ nil
280
+ }
281
+ end
282
+ end
283
+ end
284
+
285
+ module Tb::Enum
286
+ # creates a Tb::FileHeaderEnumerator object.
287
+ #
288
+ def to_fileenumerator
289
+ hbuilder = Tb::FileHeaderEnumerator.builder
290
+ self.with_header {|header|
291
+ if header
292
+ hbuilder.header.concat(header - hbuilder.header)
293
+ end
294
+ }.each {|pairs|
295
+ hbuilder.gen(pairs)
296
+ }
297
+ hbuilder.new
298
+ end
299
+ end
300
+
301
+ class Tb::FileHeaderEnumerator < Tb::FileEnumerator
302
+ include Tb::Enum
303
+
304
+ class HBuilder
305
+ def initialize(klass)
306
+ @klass = klass
307
+ @tempfile = Tempfile.new("tb")
308
+ @tempfile.binmode
309
+ @header = []
310
+ end
311
+ attr_reader :header
312
+
313
+ def gen(*objs)
314
+ @header |= objs[0].keys
315
+ Marshal.dump(objs, @tempfile)
316
+ end
317
+
318
+ def new
319
+ @tempfile.close
320
+ @klass.new(
321
+ @header,
322
+ lambda { open(@tempfile.path, "rb") },
323
+ lambda { @tempfile.close(true) })
324
+ end
325
+ end
326
+
327
+ def self.builder
328
+ HBuilder.new(Tb::FileHeaderEnumerator)
329
+ end
330
+
331
+ def self.gen_new
332
+ hgen = self.builder
333
+ return hgen.method(:gen), hgen.method(:new)
334
+ end
335
+
336
+ def initialize(header, open_func, remove_func)
337
+ super open_func, remove_func
338
+ @header = header
339
+ end
340
+
341
+ def header_and_each(header_proc)
342
+ self.use {
343
+ header_proc.call(@header) if header_proc
344
+ begin
345
+ io = @open_func.call
346
+ while true
347
+ objs = Marshal.load(io)
348
+ yield(*objs)
349
+ end
350
+ rescue EOFError
351
+ ensure
352
+ io.close
353
+ end
354
+ }
355
+ end
356
+
357
+ def each(&block)
358
+ if block_given?
359
+ header_and_each(nil, &block)
360
+ else
361
+ Tb::FileEnumerator::Reader.new(self)
362
+ end
363
+ end
364
+
365
+ end