tb 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/README +62 -50
  2. data/bin/tb +22 -18
  3. data/lib/tb.rb +35 -19
  4. data/lib/tb/basic.rb +85 -86
  5. data/lib/tb/catreader.rb +33 -116
  6. data/lib/tb/cmd_cat.rb +31 -27
  7. data/lib/tb/cmd_consecutive.rb +45 -35
  8. data/lib/tb/cmd_crop.rb +86 -52
  9. data/lib/tb/cmd_cross.rb +113 -71
  10. data/lib/tb/cmd_cut.rb +49 -44
  11. data/lib/tb/cmd_git_log.rb +193 -0
  12. data/lib/tb/cmd_grep.rb +43 -32
  13. data/lib/tb/cmd_group.rb +63 -39
  14. data/lib/tb/cmd_gsub.rb +53 -43
  15. data/lib/tb/cmd_help.rb +51 -24
  16. data/lib/tb/cmd_join.rb +32 -35
  17. data/lib/tb/cmd_ls.rb +233 -205
  18. data/lib/tb/cmd_mheader.rb +47 -37
  19. data/lib/tb/cmd_nest.rb +94 -0
  20. data/lib/tb/cmd_newfield.rb +29 -33
  21. data/lib/tb/cmd_rename.rb +40 -32
  22. data/lib/tb/cmd_shape.rb +31 -24
  23. data/lib/tb/cmd_sort.rb +46 -25
  24. data/lib/tb/cmd_svn_log.rb +47 -28
  25. data/lib/tb/cmd_tar_tvf.rb +447 -0
  26. data/lib/tb/cmd_to_csv.rb +60 -0
  27. data/lib/tb/cmd_to_json.rb +60 -0
  28. data/lib/tb/cmd_to_pnm.rb +48 -0
  29. data/lib/tb/cmd_to_pp.rb +71 -0
  30. data/lib/tb/cmd_to_tsv.rb +48 -0
  31. data/lib/tb/cmd_to_yaml.rb +52 -0
  32. data/lib/tb/cmd_unnest.rb +118 -0
  33. data/lib/tb/cmdmain.rb +24 -20
  34. data/lib/tb/cmdtop.rb +33 -25
  35. data/lib/tb/cmdutil.rb +26 -66
  36. data/lib/tb/csv.rb +46 -34
  37. data/lib/tb/enum.rb +294 -0
  38. data/lib/tb/enumerable.rb +198 -7
  39. data/lib/tb/enumerator.rb +73 -0
  40. data/lib/tb/fieldset.rb +27 -19
  41. data/lib/tb/fileenumerator.rb +365 -0
  42. data/lib/tb/json.rb +50 -0
  43. data/lib/tb/pager.rb +6 -6
  44. data/lib/tb/pairs.rb +227 -0
  45. data/lib/tb/pnm.rb +23 -22
  46. data/lib/tb/reader.rb +52 -49
  47. data/lib/tb/record.rb +48 -19
  48. data/lib/tb/revcmp.rb +38 -0
  49. data/lib/tb/ropen.rb +74 -57
  50. data/lib/tb/search.rb +25 -21
  51. data/lib/tb/tsv.rb +31 -34
  52. data/sample/excel2csv +24 -20
  53. data/sample/poi-xls2csv.rb +24 -20
  54. data/sample/poi-xls2csv.sh +22 -18
  55. data/sample/tbplot +185 -127
  56. data/test-all-cov.rb +3 -3
  57. data/test-all.rb +1 -1
  58. data/test/test_basic.rb +26 -10
  59. data/test/test_catreader.rb +7 -6
  60. data/test/test_cmd_cat.rb +32 -0
  61. data/test/test_cmd_consecutive.rb +10 -0
  62. data/test/test_cmd_crop.rb +4 -4
  63. data/test/test_cmd_cross.rb +16 -4
  64. data/test/test_cmd_git_log.rb +46 -0
  65. data/test/test_cmd_help.rb +17 -12
  66. data/test/test_cmd_join.rb +21 -1
  67. data/test/test_cmd_ls.rb +3 -4
  68. data/test/test_cmd_mheader.rb +17 -11
  69. data/test/test_cmd_nest.rb +49 -0
  70. data/test/test_cmd_sort.rb +15 -0
  71. data/test/test_cmd_tar_tvf.rb +281 -0
  72. data/test/{test_cmd_csv.rb → test_cmd_to_csv.rb} +35 -21
  73. data/test/{test_cmd_json.rb → test_cmd_to_json.rb} +31 -3
  74. data/test/{test_cmd_pnm.rb → test_cmd_to_pnm.rb} +2 -2
  75. data/test/{test_cmd_pp.rb → test_cmd_to_pp.rb} +4 -4
  76. data/test/{test_cmd_tsv.rb → test_cmd_to_tsv.rb} +4 -4
  77. data/test/{test_cmd_yaml.rb → test_cmd_to_yaml.rb} +3 -3
  78. data/test/test_cmd_unnest.rb +89 -0
  79. data/test/test_cmdtty.rb +19 -13
  80. data/test/test_enumerable.rb +83 -1
  81. data/test/test_fileenumerator.rb +265 -0
  82. data/test/test_json.rb +15 -0
  83. data/test/test_pager.rb +3 -4
  84. data/test/test_pairs.rb +122 -0
  85. data/test/test_pnm.rb +24 -24
  86. data/test/test_reader.rb +35 -13
  87. data/test/test_revcmp.rb +10 -0
  88. data/test/test_tbenum.rb +173 -0
  89. metadata +51 -23
  90. data/lib/tb/cmd_csv.rb +0 -42
  91. data/lib/tb/cmd_json.rb +0 -60
  92. data/lib/tb/cmd_pnm.rb +0 -43
  93. data/lib/tb/cmd_pp.rb +0 -70
  94. data/lib/tb/cmd_tsv.rb +0 -43
  95. data/lib/tb/cmd_yaml.rb +0 -47
data/lib/tb/enumerable.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # lib/tb/enumerable.rb - extensions for Enumerable
2
2
  #
3
- # Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions are met:
@@ -117,13 +117,13 @@ module Enumerable
117
117
  end
118
118
  value_selector = tb_cat_selector_proc(args.pop)
119
119
  key_selectors = args.map {|a| tb_cat_selector_proc(a) }
120
- has_seed = opts.include? :seed
120
+ has_seed = opts.has_key? :seed
121
121
  seed_value = opts[:seed]
122
- if opts.include?(:update) && opts.include?(:op)
122
+ if opts.has_key?(:update) && opts.has_key?(:op)
123
123
  raise ArgumentError, "both :op and :update option specified"
124
- elsif opts.include? :update
124
+ elsif opts.has_key? :update
125
125
  update_proc = opts[:update].to_proc
126
- elsif opts.include? :op
126
+ elsif opts.has_key? :op
127
127
  op_proc = opts[:op].to_proc
128
128
  update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
129
129
  else
@@ -139,11 +139,11 @@ module Enumerable
139
139
  h = result
140
140
  0.upto(ks.length-2) {|i|
141
141
  k = ks[i]
142
- h[k] = {} if !h.include?(k)
142
+ h[k] = {} if !h.has_key?(k)
143
143
  h = h[k]
144
144
  }
145
145
  lastk = ks.last
146
- if !h.include?(lastk)
146
+ if !h.has_key?(lastk)
147
147
  if has_seed
148
148
  h[lastk] = update_proc.call(ks, seed_value, v)
149
149
  else
@@ -281,4 +281,195 @@ module Enumerable
281
281
  tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
282
282
  end
283
283
 
284
+ def dump_objsfile(title, tempfile)
285
+ tempfile.flush
286
+ path = tempfile
287
+ a = []
288
+ open(path) {|f|
289
+ until f.eof?
290
+ pair = Marshal.load(f)
291
+ a << (pair ? pair.last : :sep)
292
+ end
293
+ }
294
+ puts "#{title}: #{a.inspect}"
295
+ end
296
+ private :dump_objsfile
297
+
298
+ def extsort_by(opts={}, &cmpvalue_from)
299
+ memsize = opts[:memsize] || 10000000
300
+ Enumerator.new {|y|
301
+ extsort_by_internal(memsize, cmpvalue_from, y)
302
+ }
303
+ end
304
+
305
+ def extsort_by_internal(memsize, cmpvalue_from, y)
306
+ tmp1 = Tempfile.new("tbsortA")
307
+ tmp2 = Tempfile.new("tbsortB")
308
+ extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
309
+ if tmp1.size == 0 && tmp2.size == 0
310
+ return Enumerator.new {|_| }
311
+ end
312
+ tmp3 = Tempfile.new("tbsortC")
313
+ tmp4 = Tempfile.new("tbsortD")
314
+ while tmp2.size != 0
315
+ #dump_objsfile(:tmp1, tmp1)
316
+ #dump_objsfile(:tmp2, tmp2)
317
+ #dump_objsfile(:tmp3, tmp3)
318
+ #dump_objsfile(:tmp4, tmp4)
319
+ extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
320
+ tmp1.rewind
321
+ tmp1.truncate(0)
322
+ tmp2.rewind
323
+ tmp2.truncate(0)
324
+ tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
325
+ end
326
+ #dump_objsfile(:tmp1, tmp1)
327
+ #dump_objsfile(:tmp2, tmp2)
328
+ #dump_objsfile(:tmp3, tmp3)
329
+ #dump_objsfile(:tmp4, tmp4)
330
+ extsort_by_strip_cv(tmp1, y)
331
+ ensure
332
+ tmp1.close(true) if tmp1
333
+ tmp2.close(true) if tmp2
334
+ tmp3.close(true) if tmp3
335
+ tmp4.close(true) if tmp4
336
+ end
337
+ private :extsort_by_internal
338
+
339
+ def extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
340
+ prevobj_cv = nil
341
+ tmp_current, tmp_another = tmp1, tmp2
342
+ buf = []
343
+ buf_size = 0
344
+ buf_mode = true
345
+ self.each_with_index {|obj, i|
346
+ obj_cv = cmpvalue_from.call(obj)
347
+ #p [obj, obj_cv]
348
+ #p [prevobj_cv, buf_mode, obj, obj_cv]
349
+ if buf_mode
350
+ dumped = Marshal.dump([obj_cv, obj])
351
+ buf << [obj_cv, i, dumped]
352
+ buf_size += dumped.size
353
+ if memsize < buf_size
354
+ buf.sort!
355
+ buf.each {|_, _, d|
356
+ tmp_current.write d
357
+ }
358
+ prevobj_cv, = buf.last
359
+ buf.clear
360
+ buf_mode = false
361
+ end
362
+ elsif prevobj_cv <= obj_cv
363
+ Marshal.dump([obj_cv, obj], tmp_current)
364
+ prevobj_cv = obj_cv
365
+ else
366
+ dumped = Marshal.dump([obj_cv, obj])
367
+ Marshal.dump(nil, tmp_current)
368
+ buf = [[obj_cv, i, dumped]]
369
+ buf_size = dumped.size
370
+ buf_mode = true
371
+ tmp_current, tmp_another = tmp_another, tmp_current
372
+ end
373
+ }
374
+ if buf_mode
375
+ buf.sort!
376
+ buf.each {|_, _, d|
377
+ tmp_current.write d
378
+ }
379
+ end
380
+ if !buf_mode || !buf.empty?
381
+ Marshal.dump(nil, tmp_current)
382
+ end
383
+ end
384
+ private :extsort_by_first_split
385
+
386
+ def extsort_by_merge(src1, src2, dst1, dst2)
387
+ src1.rewind
388
+ src2.rewind
389
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
390
+ obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
391
+ prefer1 = true
392
+ while true
393
+ cmp = obj1_cv <=> obj2_cv
394
+ if prefer1 ? cmp > 0 : cmp >= 0
395
+ obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
396
+ prefer1 = !prefer1
397
+ end
398
+ Marshal.dump([obj1_cv, obj1], dst1)
399
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
400
+ if !obj1_pair
401
+ begin
402
+ Marshal.dump(obj2_pair, dst1)
403
+ obj2_pair = Marshal.load(src2)
404
+ end until !obj2_pair
405
+ Marshal.dump(nil, dst1)
406
+ dst1, dst2 = dst2, dst1
407
+ break if src1.eof?
408
+ break if src2.eof?
409
+ obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
410
+ obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
411
+ end
412
+ end
413
+ if !src1.eof?
414
+ restsrc = src1
415
+ elsif !src2.eof?
416
+ restsrc = src2
417
+ else
418
+ return
419
+ end
420
+ until restsrc.eof?
421
+ restobj_pair = Marshal.load(restsrc)
422
+ Marshal.dump(restobj_pair, dst1)
423
+ end
424
+ end
425
+ private :extsort_by_merge
426
+
427
+ def extsort_by_strip_cv(tmp1, y)
428
+ tmp1.rewind
429
+ while true
430
+ pair = Marshal.load(tmp1)
431
+ break if !pair
432
+ _, obj = pair
433
+ y.yield obj
434
+ end
435
+ end
436
+ private :extsort_by_strip_cv
437
+
438
+ # splits self by _boundary_p_ which is called with adjacent two elements.
439
+ #
440
+ # _before_group_ is called before each group with the first element.
441
+ # _after_group_ is called after each group with the last element.
442
+ # _body_ is called for each element.
443
+ #
444
+ def each_group_element(boundary_p, before_group, body, after_group)
445
+ prev = nil
446
+ first = true
447
+ self.each {|curr|
448
+ if first
449
+ before_group.call(curr)
450
+ body.call(curr)
451
+ prev = curr
452
+ first = false
453
+ elsif boundary_p.call(prev, curr)
454
+ after_group.call(prev)
455
+ before_group.call(curr)
456
+ body.call(curr)
457
+ prev = curr
458
+ else
459
+ body.call(curr)
460
+ prev = curr
461
+ end
462
+ }
463
+ if !first
464
+ after_group.call(prev)
465
+ end
466
+ end
467
+
468
+ def lazy_map
469
+ Enumerator.new {|y|
470
+ self.each {|*vs|
471
+ y.yield(yield(*vs))
472
+ }
473
+ }
474
+ end
284
475
  end
@@ -0,0 +1,73 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ class Tb::Yielder
30
+ def initialize(header_proc, each_proc)
31
+ @header_proc_called = false
32
+ @header_proc = header_proc
33
+ @each_proc = each_proc
34
+ end
35
+ attr_reader :header_proc_called
36
+
37
+ def set_header(header)
38
+ raise ArgumentError, "set_header called twice" if @header_proc_called
39
+ @header_proc_called = true
40
+ @header_proc.call(header) if @header_proc
41
+ end
42
+
43
+ def yield(*args)
44
+ if !@header_proc_called
45
+ set_header(nil)
46
+ end
47
+ @each_proc.call(*args)
48
+ end
49
+ alias << yield
50
+ end
51
+
52
+ class Tb::Enumerator
53
+ include Tb::Enum
54
+
55
+ def initialize(&enumerator_proc)
56
+ @enumerator_proc = enumerator_proc
57
+ end
58
+
59
+ def each(&each_proc)
60
+ yielder = Tb::Yielder.new(nil, each_proc)
61
+ @enumerator_proc.call(yielder)
62
+ nil
63
+ end
64
+
65
+ def header_and_each(header_proc, &each_proc)
66
+ yielder = Tb::Yielder.new(header_proc, each_proc)
67
+ @enumerator_proc.call(yielder)
68
+ if !yielder.header_proc_called
69
+ header_proc.call(nil)
70
+ end
71
+ nil
72
+ end
73
+ end
data/lib/tb/fieldset.rb CHANGED
@@ -1,30 +1,38 @@
1
1
  # lib/tb/fieldset.rb - Tb::FieldSet class
2
2
  #
3
- # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
- # modification, are permitted provided that the following conditions are met:
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
7
8
  #
8
- # 1. Redistributions of source code must retain the above copyright notice, this
9
- # list of conditions and the following disclaimer.
10
- # 2. Redistributions in binary form must reproduce the above copyright notice,
11
- # this list of conditions and the following disclaimer in the documentation
12
- # and/or other materials provided with the distribution.
13
- # 3. The name of the author may not be used to endorse or promote products
14
- # derived from this software without specific prior written permission.
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
15
18
  #
16
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
25
- # OF SUCH DAMAGE.
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
30
 
27
31
  class Tb::FieldSet
32
+ def self.normalize(header)
33
+ Tb::FieldSet.new(*header).header
34
+ end
35
+
28
36
  def initialize(*fs)
29
37
  @header = []
30
38
  @field2index = {}
@@ -0,0 +1,365 @@
1
+ # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ module Enumerable
30
+ # creates a Tb::FileEnumerator object.
31
+ #
32
+ def to_fileenumerator
33
+ Tb::FileEnumerator.new_tempfile {|gen|
34
+ self.each {|*objs|
35
+ gen.call(*objs)
36
+ }
37
+ }
38
+ end
39
+ end
40
+
41
+ # Tb::FileEnumerator is an enumerator backed by a temporally file.
42
+ #
43
+ # An instance of Tb::FileEnumerator can be used just once,
44
+ # except if Tb::FileEnumerator#use is explicitly used.
45
+ #
46
+ # After the use, the temporally file is removed.
47
+ # If the object is not used, the temporally file is removed by GC
48
+ # as usual Tempfile object.
49
+ #
50
+ class Tb::FileEnumerator
51
+ include Enumerable
52
+
53
+ class Builder
54
+ def initialize(klass)
55
+ @klass = klass
56
+ @tempfile = Tempfile.new("tb")
57
+ @tempfile.binmode
58
+ end
59
+
60
+ def gen(*objs)
61
+ Marshal.dump(objs, @tempfile)
62
+ end
63
+
64
+ def new
65
+ @tempfile.close
66
+ @klass.new(
67
+ lambda { open(@tempfile.path, "rb") },
68
+ lambda { @tempfile.close(true) })
69
+ end
70
+ end
71
+
72
+ def self.builder
73
+ Builder.new(Tb::FileEnumerator)
74
+ end
75
+
76
+ def self.new_tempfile
77
+ gen, new = self.gen_new
78
+ yield gen
79
+ new.call
80
+ end
81
+
82
+ def self.gen_new
83
+ builder = self.builder
84
+ return builder.method(:gen), builder.method(:new)
85
+ end
86
+
87
+ def initialize(open_func, remove_func)
88
+ @use_count = 0
89
+ @open_func = open_func
90
+ @remove_func = remove_func
91
+ end
92
+
93
+ def use_count_up
94
+ @use_count += 1
95
+ end
96
+ private :use_count_up
97
+
98
+ def use_count_down
99
+ @use_count -= 1
100
+ if @use_count == 0
101
+ @remove_func.call
102
+ @open_func = @remove_func = nil
103
+ end
104
+ end
105
+ private :use_count_down
106
+
107
+ # delay removing the tempfile until the given block is finished.
108
+ def use
109
+ if !@open_func
110
+ raise ArgumentError, "FileEnumerator reused."
111
+ end
112
+ use_count_up
113
+ begin
114
+ yield
115
+ ensure
116
+ use_count_down
117
+ end
118
+ end
119
+
120
+ def each
121
+ if block_given?
122
+ self.use {
123
+ begin
124
+ io = @open_func.call
125
+ while true
126
+ objs = Marshal.load(io)
127
+ yield(*objs)
128
+ end
129
+ rescue EOFError
130
+ ensure
131
+ io.close
132
+ end
133
+ }
134
+ else
135
+ Reader.new(self)
136
+ end
137
+ end
138
+
139
+ def open_reader
140
+ reader = Reader.new(self)
141
+ reader.use {
142
+ yield reader
143
+ }
144
+ end
145
+
146
+ class Reader
147
+ def initialize(fileenumerator)
148
+ @use_count = 0
149
+ @fileenumerator = fileenumerator
150
+ @fileenumerator.send(:use_count_up)
151
+ @io = @fileenumerator.instance_eval { @open_func.call }
152
+ peek_reset
153
+ end
154
+
155
+ def closed?
156
+ @io.nil?
157
+ end
158
+
159
+ def finalize
160
+ @io.close
161
+ @io = nil
162
+ @fileenumerator.send(:use_count_down)
163
+ peek_reset
164
+ end
165
+ private :finalize
166
+
167
+ def use_begin
168
+ @use_count += 1
169
+ end
170
+
171
+ def use_end
172
+ @use_count -= 1
173
+ if @use_count == 0
174
+ finalize
175
+ end
176
+ end
177
+
178
+ def use
179
+ use_begin
180
+ begin
181
+ yield
182
+ ensure
183
+ use_end
184
+ end
185
+ end
186
+
187
+ def peek_reset
188
+ @peeked = false
189
+ @peeked_objs = nil
190
+ if @io
191
+ @pos = @io.pos
192
+ else
193
+ @pos = nil
194
+ end
195
+ end
196
+ private :peek_reset
197
+
198
+ def pos
199
+ @pos
200
+ end
201
+
202
+ def pos=(val)
203
+ @io.seek(val)
204
+ peek_reset
205
+ nil
206
+ end
207
+
208
+ def peek_values
209
+ if !@io
210
+ raise StopIteration
211
+ end
212
+ if !@peeked
213
+ begin
214
+ objs = Marshal.load(@io)
215
+ rescue EOFError
216
+ if @use_count == 0
217
+ finalize
218
+ end
219
+ raise StopIteration
220
+ end
221
+ @peeked = true
222
+ @peeked_objs = objs
223
+ end
224
+ @peeked_objs
225
+ end
226
+
227
+ def peek
228
+ result = self.peek_values
229
+ if result.kind_of?(Array) && result.length == 1
230
+ result = result[0]
231
+ end
232
+ result
233
+ end
234
+
235
+ def next_values
236
+ result = self.peek_values
237
+ peek_reset
238
+ result
239
+ end
240
+
241
+ def next
242
+ result = self.peek
243
+ peek_reset
244
+ result
245
+ end
246
+
247
+ def rewind
248
+ if !@io
249
+ raise ArgumentError, "already closed."
250
+ end
251
+ @io.rewind
252
+ peek_reset
253
+ nil
254
+ end
255
+
256
+ def subeach_by(&distinguish_value)
257
+ Enumerator.new {|y|
258
+ begin
259
+ vs = self.peek_values
260
+ rescue StopIteration
261
+ next
262
+ end
263
+ dv = distinguish_value.call(*vs)
264
+ while true
265
+ y.yield(*vs)
266
+ self.next_values
267
+ begin
268
+ next_vs = self.peek_values
269
+ rescue StopIteration
270
+ break
271
+ end
272
+ next_dv = distinguish_value.call(*next_vs)
273
+ if dv != next_dv
274
+ break
275
+ end
276
+ vs = next_vs
277
+ dv = next_dv
278
+ end
279
+ nil
280
+ }
281
+ end
282
+ end
283
+ end
284
+
285
+ module Tb::Enum
286
+ # creates a Tb::FileHeaderEnumerator object.
287
+ #
288
+ def to_fileenumerator
289
+ hbuilder = Tb::FileHeaderEnumerator.builder
290
+ self.with_header {|header|
291
+ if header
292
+ hbuilder.header.concat(header - hbuilder.header)
293
+ end
294
+ }.each {|pairs|
295
+ hbuilder.gen(pairs)
296
+ }
297
+ hbuilder.new
298
+ end
299
+ end
300
+
301
+ class Tb::FileHeaderEnumerator < Tb::FileEnumerator
302
+ include Tb::Enum
303
+
304
+ class HBuilder
305
+ def initialize(klass)
306
+ @klass = klass
307
+ @tempfile = Tempfile.new("tb")
308
+ @tempfile.binmode
309
+ @header = []
310
+ end
311
+ attr_reader :header
312
+
313
+ def gen(*objs)
314
+ @header |= objs[0].keys
315
+ Marshal.dump(objs, @tempfile)
316
+ end
317
+
318
+ def new
319
+ @tempfile.close
320
+ @klass.new(
321
+ @header,
322
+ lambda { open(@tempfile.path, "rb") },
323
+ lambda { @tempfile.close(true) })
324
+ end
325
+ end
326
+
327
+ def self.builder
328
+ HBuilder.new(Tb::FileHeaderEnumerator)
329
+ end
330
+
331
+ def self.gen_new
332
+ hgen = self.builder
333
+ return hgen.method(:gen), hgen.method(:new)
334
+ end
335
+
336
+ def initialize(header, open_func, remove_func)
337
+ super open_func, remove_func
338
+ @header = header
339
+ end
340
+
341
+ def header_and_each(header_proc)
342
+ self.use {
343
+ header_proc.call(@header) if header_proc
344
+ begin
345
+ io = @open_func.call
346
+ while true
347
+ objs = Marshal.load(io)
348
+ yield(*objs)
349
+ end
350
+ rescue EOFError
351
+ ensure
352
+ io.close
353
+ end
354
+ }
355
+ end
356
+
357
+ def each(&block)
358
+ if block_given?
359
+ header_and_each(nil, &block)
360
+ else
361
+ Tb::FileEnumerator::Reader.new(self)
362
+ end
363
+ end
364
+
365
+ end