tb 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +62 -50
- data/bin/tb +22 -18
- data/lib/tb.rb +35 -19
- data/lib/tb/basic.rb +85 -86
- data/lib/tb/catreader.rb +33 -116
- data/lib/tb/cmd_cat.rb +31 -27
- data/lib/tb/cmd_consecutive.rb +45 -35
- data/lib/tb/cmd_crop.rb +86 -52
- data/lib/tb/cmd_cross.rb +113 -71
- data/lib/tb/cmd_cut.rb +49 -44
- data/lib/tb/cmd_git_log.rb +193 -0
- data/lib/tb/cmd_grep.rb +43 -32
- data/lib/tb/cmd_group.rb +63 -39
- data/lib/tb/cmd_gsub.rb +53 -43
- data/lib/tb/cmd_help.rb +51 -24
- data/lib/tb/cmd_join.rb +32 -35
- data/lib/tb/cmd_ls.rb +233 -205
- data/lib/tb/cmd_mheader.rb +47 -37
- data/lib/tb/cmd_nest.rb +94 -0
- data/lib/tb/cmd_newfield.rb +29 -33
- data/lib/tb/cmd_rename.rb +40 -32
- data/lib/tb/cmd_shape.rb +31 -24
- data/lib/tb/cmd_sort.rb +46 -25
- data/lib/tb/cmd_svn_log.rb +47 -28
- data/lib/tb/cmd_tar_tvf.rb +447 -0
- data/lib/tb/cmd_to_csv.rb +60 -0
- data/lib/tb/cmd_to_json.rb +60 -0
- data/lib/tb/cmd_to_pnm.rb +48 -0
- data/lib/tb/cmd_to_pp.rb +71 -0
- data/lib/tb/cmd_to_tsv.rb +48 -0
- data/lib/tb/cmd_to_yaml.rb +52 -0
- data/lib/tb/cmd_unnest.rb +118 -0
- data/lib/tb/cmdmain.rb +24 -20
- data/lib/tb/cmdtop.rb +33 -25
- data/lib/tb/cmdutil.rb +26 -66
- data/lib/tb/csv.rb +46 -34
- data/lib/tb/enum.rb +294 -0
- data/lib/tb/enumerable.rb +198 -7
- data/lib/tb/enumerator.rb +73 -0
- data/lib/tb/fieldset.rb +27 -19
- data/lib/tb/fileenumerator.rb +365 -0
- data/lib/tb/json.rb +50 -0
- data/lib/tb/pager.rb +6 -6
- data/lib/tb/pairs.rb +227 -0
- data/lib/tb/pnm.rb +23 -22
- data/lib/tb/reader.rb +52 -49
- data/lib/tb/record.rb +48 -19
- data/lib/tb/revcmp.rb +38 -0
- data/lib/tb/ropen.rb +74 -57
- data/lib/tb/search.rb +25 -21
- data/lib/tb/tsv.rb +31 -34
- data/sample/excel2csv +24 -20
- data/sample/poi-xls2csv.rb +24 -20
- data/sample/poi-xls2csv.sh +22 -18
- data/sample/tbplot +185 -127
- data/test-all-cov.rb +3 -3
- data/test-all.rb +1 -1
- data/test/test_basic.rb +26 -10
- data/test/test_catreader.rb +7 -6
- data/test/test_cmd_cat.rb +32 -0
- data/test/test_cmd_consecutive.rb +10 -0
- data/test/test_cmd_crop.rb +4 -4
- data/test/test_cmd_cross.rb +16 -4
- data/test/test_cmd_git_log.rb +46 -0
- data/test/test_cmd_help.rb +17 -12
- data/test/test_cmd_join.rb +21 -1
- data/test/test_cmd_ls.rb +3 -4
- data/test/test_cmd_mheader.rb +17 -11
- data/test/test_cmd_nest.rb +49 -0
- data/test/test_cmd_sort.rb +15 -0
- data/test/test_cmd_tar_tvf.rb +281 -0
- data/test/{test_cmd_csv.rb → test_cmd_to_csv.rb} +35 -21
- data/test/{test_cmd_json.rb → test_cmd_to_json.rb} +31 -3
- data/test/{test_cmd_pnm.rb → test_cmd_to_pnm.rb} +2 -2
- data/test/{test_cmd_pp.rb → test_cmd_to_pp.rb} +4 -4
- data/test/{test_cmd_tsv.rb → test_cmd_to_tsv.rb} +4 -4
- data/test/{test_cmd_yaml.rb → test_cmd_to_yaml.rb} +3 -3
- data/test/test_cmd_unnest.rb +89 -0
- data/test/test_cmdtty.rb +19 -13
- data/test/test_enumerable.rb +83 -1
- data/test/test_fileenumerator.rb +265 -0
- data/test/test_json.rb +15 -0
- data/test/test_pager.rb +3 -4
- data/test/test_pairs.rb +122 -0
- data/test/test_pnm.rb +24 -24
- data/test/test_reader.rb +35 -13
- data/test/test_revcmp.rb +10 -0
- data/test/test_tbenum.rb +173 -0
- metadata +51 -23
- data/lib/tb/cmd_csv.rb +0 -42
- data/lib/tb/cmd_json.rb +0 -60
- data/lib/tb/cmd_pnm.rb +0 -43
- data/lib/tb/cmd_pp.rb +0 -70
- data/lib/tb/cmd_tsv.rb +0 -43
- data/lib/tb/cmd_yaml.rb +0 -47
data/lib/tb/enumerable.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# lib/tb/enumerable.rb - extensions for Enumerable
|
2
2
|
#
|
3
|
-
# Copyright (C) 2010 Tanaka Akira <akr@fsij.org>
|
3
|
+
# Copyright (C) 2010-2012 Tanaka Akira <akr@fsij.org>
|
4
4
|
#
|
5
5
|
# Redistribution and use in source and binary forms, with or without
|
6
6
|
# modification, are permitted provided that the following conditions are met:
|
@@ -117,13 +117,13 @@ module Enumerable
|
|
117
117
|
end
|
118
118
|
value_selector = tb_cat_selector_proc(args.pop)
|
119
119
|
key_selectors = args.map {|a| tb_cat_selector_proc(a) }
|
120
|
-
has_seed = opts.
|
120
|
+
has_seed = opts.has_key? :seed
|
121
121
|
seed_value = opts[:seed]
|
122
|
-
if opts.
|
122
|
+
if opts.has_key?(:update) && opts.has_key?(:op)
|
123
123
|
raise ArgumentError, "both :op and :update option specified"
|
124
|
-
elsif opts.
|
124
|
+
elsif opts.has_key? :update
|
125
125
|
update_proc = opts[:update].to_proc
|
126
|
-
elsif opts.
|
126
|
+
elsif opts.has_key? :op
|
127
127
|
op_proc = opts[:op].to_proc
|
128
128
|
update_proc = lambda {|ks, s, v| op_proc.call(s, v) }
|
129
129
|
else
|
@@ -139,11 +139,11 @@ module Enumerable
|
|
139
139
|
h = result
|
140
140
|
0.upto(ks.length-2) {|i|
|
141
141
|
k = ks[i]
|
142
|
-
h[k] = {} if !h.
|
142
|
+
h[k] = {} if !h.has_key?(k)
|
143
143
|
h = h[k]
|
144
144
|
}
|
145
145
|
lastk = ks.last
|
146
|
-
if !h.
|
146
|
+
if !h.has_key?(lastk)
|
147
147
|
if has_seed
|
148
148
|
h[lastk] = update_proc.call(ks, seed_value, v)
|
149
149
|
else
|
@@ -281,4 +281,195 @@ module Enumerable
|
|
281
281
|
tb_categorize(*(args + [lambda {|e| 1 }, {:update => lambda {|ks, s, v| s + v }}]))
|
282
282
|
end
|
283
283
|
|
284
|
+
def dump_objsfile(title, tempfile)
|
285
|
+
tempfile.flush
|
286
|
+
path = tempfile
|
287
|
+
a = []
|
288
|
+
open(path) {|f|
|
289
|
+
until f.eof?
|
290
|
+
pair = Marshal.load(f)
|
291
|
+
a << (pair ? pair.last : :sep)
|
292
|
+
end
|
293
|
+
}
|
294
|
+
puts "#{title}: #{a.inspect}"
|
295
|
+
end
|
296
|
+
private :dump_objsfile
|
297
|
+
|
298
|
+
def extsort_by(opts={}, &cmpvalue_from)
|
299
|
+
memsize = opts[:memsize] || 10000000
|
300
|
+
Enumerator.new {|y|
|
301
|
+
extsort_by_internal(memsize, cmpvalue_from, y)
|
302
|
+
}
|
303
|
+
end
|
304
|
+
|
305
|
+
def extsort_by_internal(memsize, cmpvalue_from, y)
|
306
|
+
tmp1 = Tempfile.new("tbsortA")
|
307
|
+
tmp2 = Tempfile.new("tbsortB")
|
308
|
+
extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
|
309
|
+
if tmp1.size == 0 && tmp2.size == 0
|
310
|
+
return Enumerator.new {|_| }
|
311
|
+
end
|
312
|
+
tmp3 = Tempfile.new("tbsortC")
|
313
|
+
tmp4 = Tempfile.new("tbsortD")
|
314
|
+
while tmp2.size != 0
|
315
|
+
#dump_objsfile(:tmp1, tmp1)
|
316
|
+
#dump_objsfile(:tmp2, tmp2)
|
317
|
+
#dump_objsfile(:tmp3, tmp3)
|
318
|
+
#dump_objsfile(:tmp4, tmp4)
|
319
|
+
extsort_by_merge(tmp1, tmp2, tmp3, tmp4)
|
320
|
+
tmp1.rewind
|
321
|
+
tmp1.truncate(0)
|
322
|
+
tmp2.rewind
|
323
|
+
tmp2.truncate(0)
|
324
|
+
tmp1, tmp2, tmp3, tmp4 = tmp3, tmp4, tmp1, tmp2
|
325
|
+
end
|
326
|
+
#dump_objsfile(:tmp1, tmp1)
|
327
|
+
#dump_objsfile(:tmp2, tmp2)
|
328
|
+
#dump_objsfile(:tmp3, tmp3)
|
329
|
+
#dump_objsfile(:tmp4, tmp4)
|
330
|
+
extsort_by_strip_cv(tmp1, y)
|
331
|
+
ensure
|
332
|
+
tmp1.close(true) if tmp1
|
333
|
+
tmp2.close(true) if tmp2
|
334
|
+
tmp3.close(true) if tmp3
|
335
|
+
tmp4.close(true) if tmp4
|
336
|
+
end
|
337
|
+
private :extsort_by_internal
|
338
|
+
|
339
|
+
def extsort_by_first_split(tmp1, tmp2, cmpvalue_from, memsize)
|
340
|
+
prevobj_cv = nil
|
341
|
+
tmp_current, tmp_another = tmp1, tmp2
|
342
|
+
buf = []
|
343
|
+
buf_size = 0
|
344
|
+
buf_mode = true
|
345
|
+
self.each_with_index {|obj, i|
|
346
|
+
obj_cv = cmpvalue_from.call(obj)
|
347
|
+
#p [obj, obj_cv]
|
348
|
+
#p [prevobj_cv, buf_mode, obj, obj_cv]
|
349
|
+
if buf_mode
|
350
|
+
dumped = Marshal.dump([obj_cv, obj])
|
351
|
+
buf << [obj_cv, i, dumped]
|
352
|
+
buf_size += dumped.size
|
353
|
+
if memsize < buf_size
|
354
|
+
buf.sort!
|
355
|
+
buf.each {|_, _, d|
|
356
|
+
tmp_current.write d
|
357
|
+
}
|
358
|
+
prevobj_cv, = buf.last
|
359
|
+
buf.clear
|
360
|
+
buf_mode = false
|
361
|
+
end
|
362
|
+
elsif prevobj_cv <= obj_cv
|
363
|
+
Marshal.dump([obj_cv, obj], tmp_current)
|
364
|
+
prevobj_cv = obj_cv
|
365
|
+
else
|
366
|
+
dumped = Marshal.dump([obj_cv, obj])
|
367
|
+
Marshal.dump(nil, tmp_current)
|
368
|
+
buf = [[obj_cv, i, dumped]]
|
369
|
+
buf_size = dumped.size
|
370
|
+
buf_mode = true
|
371
|
+
tmp_current, tmp_another = tmp_another, tmp_current
|
372
|
+
end
|
373
|
+
}
|
374
|
+
if buf_mode
|
375
|
+
buf.sort!
|
376
|
+
buf.each {|_, _, d|
|
377
|
+
tmp_current.write d
|
378
|
+
}
|
379
|
+
end
|
380
|
+
if !buf_mode || !buf.empty?
|
381
|
+
Marshal.dump(nil, tmp_current)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
private :extsort_by_first_split
|
385
|
+
|
386
|
+
def extsort_by_merge(src1, src2, dst1, dst2)
|
387
|
+
src1.rewind
|
388
|
+
src2.rewind
|
389
|
+
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
390
|
+
obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
|
391
|
+
prefer1 = true
|
392
|
+
while true
|
393
|
+
cmp = obj1_cv <=> obj2_cv
|
394
|
+
if prefer1 ? cmp > 0 : cmp >= 0
|
395
|
+
obj1_pair, obj1_cv, obj1, src1, obj2_pair, obj2_cv, obj2, src2 = obj2_pair, obj2_cv, obj2, src2, obj1_pair, obj1_cv, obj1, src1
|
396
|
+
prefer1 = !prefer1
|
397
|
+
end
|
398
|
+
Marshal.dump([obj1_cv, obj1], dst1)
|
399
|
+
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
400
|
+
if !obj1_pair
|
401
|
+
begin
|
402
|
+
Marshal.dump(obj2_pair, dst1)
|
403
|
+
obj2_pair = Marshal.load(src2)
|
404
|
+
end until !obj2_pair
|
405
|
+
Marshal.dump(nil, dst1)
|
406
|
+
dst1, dst2 = dst2, dst1
|
407
|
+
break if src1.eof?
|
408
|
+
break if src2.eof?
|
409
|
+
obj1_cv, obj1 = obj1_pair = Marshal.load(src1)
|
410
|
+
obj2_cv, obj2 = obj2_pair = Marshal.load(src2)
|
411
|
+
end
|
412
|
+
end
|
413
|
+
if !src1.eof?
|
414
|
+
restsrc = src1
|
415
|
+
elsif !src2.eof?
|
416
|
+
restsrc = src2
|
417
|
+
else
|
418
|
+
return
|
419
|
+
end
|
420
|
+
until restsrc.eof?
|
421
|
+
restobj_pair = Marshal.load(restsrc)
|
422
|
+
Marshal.dump(restobj_pair, dst1)
|
423
|
+
end
|
424
|
+
end
|
425
|
+
private :extsort_by_merge
|
426
|
+
|
427
|
+
def extsort_by_strip_cv(tmp1, y)
|
428
|
+
tmp1.rewind
|
429
|
+
while true
|
430
|
+
pair = Marshal.load(tmp1)
|
431
|
+
break if !pair
|
432
|
+
_, obj = pair
|
433
|
+
y.yield obj
|
434
|
+
end
|
435
|
+
end
|
436
|
+
private :extsort_by_strip_cv
|
437
|
+
|
438
|
+
# splits self by _boundary_p_ which is called with adjacent two elements.
|
439
|
+
#
|
440
|
+
# _before_group_ is called before each group with the first element.
|
441
|
+
# _after_group_ is called after each group with the last element.
|
442
|
+
# _body_ is called for each element.
|
443
|
+
#
|
444
|
+
def each_group_element(boundary_p, before_group, body, after_group)
|
445
|
+
prev = nil
|
446
|
+
first = true
|
447
|
+
self.each {|curr|
|
448
|
+
if first
|
449
|
+
before_group.call(curr)
|
450
|
+
body.call(curr)
|
451
|
+
prev = curr
|
452
|
+
first = false
|
453
|
+
elsif boundary_p.call(prev, curr)
|
454
|
+
after_group.call(prev)
|
455
|
+
before_group.call(curr)
|
456
|
+
body.call(curr)
|
457
|
+
prev = curr
|
458
|
+
else
|
459
|
+
body.call(curr)
|
460
|
+
prev = curr
|
461
|
+
end
|
462
|
+
}
|
463
|
+
if !first
|
464
|
+
after_group.call(prev)
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
def lazy_map
|
469
|
+
Enumerator.new {|y|
|
470
|
+
self.each {|*vs|
|
471
|
+
y.yield(yield(*vs))
|
472
|
+
}
|
473
|
+
}
|
474
|
+
end
|
284
475
|
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions
|
5
|
+
# are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer in the documentation and/or other materials provided
|
12
|
+
# with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote
|
14
|
+
# products derived from this software without specific prior
|
15
|
+
# written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
18
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
21
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
class Tb::Yielder
|
30
|
+
def initialize(header_proc, each_proc)
|
31
|
+
@header_proc_called = false
|
32
|
+
@header_proc = header_proc
|
33
|
+
@each_proc = each_proc
|
34
|
+
end
|
35
|
+
attr_reader :header_proc_called
|
36
|
+
|
37
|
+
def set_header(header)
|
38
|
+
raise ArgumentError, "set_header called twice" if @header_proc_called
|
39
|
+
@header_proc_called = true
|
40
|
+
@header_proc.call(header) if @header_proc
|
41
|
+
end
|
42
|
+
|
43
|
+
def yield(*args)
|
44
|
+
if !@header_proc_called
|
45
|
+
set_header(nil)
|
46
|
+
end
|
47
|
+
@each_proc.call(*args)
|
48
|
+
end
|
49
|
+
alias << yield
|
50
|
+
end
|
51
|
+
|
52
|
+
class Tb::Enumerator
|
53
|
+
include Tb::Enum
|
54
|
+
|
55
|
+
def initialize(&enumerator_proc)
|
56
|
+
@enumerator_proc = enumerator_proc
|
57
|
+
end
|
58
|
+
|
59
|
+
def each(&each_proc)
|
60
|
+
yielder = Tb::Yielder.new(nil, each_proc)
|
61
|
+
@enumerator_proc.call(yielder)
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
|
65
|
+
def header_and_each(header_proc, &each_proc)
|
66
|
+
yielder = Tb::Yielder.new(header_proc, each_proc)
|
67
|
+
@enumerator_proc.call(yielder)
|
68
|
+
if !yielder.header_proc_called
|
69
|
+
header_proc.call(nil)
|
70
|
+
end
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
data/lib/tb/fieldset.rb
CHANGED
@@ -1,30 +1,38 @@
|
|
1
1
|
# lib/tb/fieldset.rb - Tb::FieldSet class
|
2
2
|
#
|
3
|
-
# Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
|
3
|
+
# Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
|
4
4
|
#
|
5
5
|
# Redistribution and use in source and binary forms, with or without
|
6
|
-
# modification, are permitted provided that the following conditions
|
6
|
+
# modification, are permitted provided that the following conditions
|
7
|
+
# are met:
|
7
8
|
#
|
8
|
-
# 1. Redistributions of source code must retain the above copyright
|
9
|
-
# list of conditions and the following disclaimer.
|
10
|
-
# 2. Redistributions in binary form must reproduce the above
|
11
|
-
# this list of conditions and the following
|
12
|
-
# and/or other materials provided
|
13
|
-
#
|
14
|
-
#
|
9
|
+
# 1. Redistributions of source code must retain the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer.
|
11
|
+
# 2. Redistributions in binary form must reproduce the above
|
12
|
+
# copyright notice, this list of conditions and the following
|
13
|
+
# disclaimer in the documentation and/or other materials provided
|
14
|
+
# with the distribution.
|
15
|
+
# 3. The name of the author may not be used to endorse or promote
|
16
|
+
# products derived from this software without specific prior
|
17
|
+
# written permission.
|
15
18
|
#
|
16
|
-
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
17
|
-
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
18
|
-
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
19
|
-
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# IN
|
25
|
-
# OF
|
19
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
20
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
21
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
22
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
23
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
25
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
26
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
27
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
28
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
29
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
26
30
|
|
27
31
|
class Tb::FieldSet
|
32
|
+
def self.normalize(header)
|
33
|
+
Tb::FieldSet.new(*header).header
|
34
|
+
end
|
35
|
+
|
28
36
|
def initialize(*fs)
|
29
37
|
@header = []
|
30
38
|
@field2index = {}
|
@@ -0,0 +1,365 @@
|
|
1
|
+
# Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions
|
5
|
+
# are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following
|
11
|
+
# disclaimer in the documentation and/or other materials provided
|
12
|
+
# with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote
|
14
|
+
# products derived from this software without specific prior
|
15
|
+
# written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
18
|
+
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
19
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
20
|
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
21
|
+
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
22
|
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
23
|
+
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
24
|
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
25
|
+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
26
|
+
# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
27
|
+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
module Enumerable
|
30
|
+
# creates a Tb::FileEnumerator object.
|
31
|
+
#
|
32
|
+
def to_fileenumerator
|
33
|
+
Tb::FileEnumerator.new_tempfile {|gen|
|
34
|
+
self.each {|*objs|
|
35
|
+
gen.call(*objs)
|
36
|
+
}
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tb::FileEnumerator is an enumerator backed by a temporally file.
|
42
|
+
#
|
43
|
+
# An instance of Tb::FileEnumerator can be used just once,
|
44
|
+
# except if Tb::FileEnumerator#use is explicitly used.
|
45
|
+
#
|
46
|
+
# After the use, the temporally file is removed.
|
47
|
+
# If the object is not used, the temporally file is removed by GC
|
48
|
+
# as usual Tempfile object.
|
49
|
+
#
|
50
|
+
class Tb::FileEnumerator
|
51
|
+
include Enumerable
|
52
|
+
|
53
|
+
class Builder
|
54
|
+
def initialize(klass)
|
55
|
+
@klass = klass
|
56
|
+
@tempfile = Tempfile.new("tb")
|
57
|
+
@tempfile.binmode
|
58
|
+
end
|
59
|
+
|
60
|
+
def gen(*objs)
|
61
|
+
Marshal.dump(objs, @tempfile)
|
62
|
+
end
|
63
|
+
|
64
|
+
def new
|
65
|
+
@tempfile.close
|
66
|
+
@klass.new(
|
67
|
+
lambda { open(@tempfile.path, "rb") },
|
68
|
+
lambda { @tempfile.close(true) })
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.builder
|
73
|
+
Builder.new(Tb::FileEnumerator)
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.new_tempfile
|
77
|
+
gen, new = self.gen_new
|
78
|
+
yield gen
|
79
|
+
new.call
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.gen_new
|
83
|
+
builder = self.builder
|
84
|
+
return builder.method(:gen), builder.method(:new)
|
85
|
+
end
|
86
|
+
|
87
|
+
def initialize(open_func, remove_func)
|
88
|
+
@use_count = 0
|
89
|
+
@open_func = open_func
|
90
|
+
@remove_func = remove_func
|
91
|
+
end
|
92
|
+
|
93
|
+
def use_count_up
|
94
|
+
@use_count += 1
|
95
|
+
end
|
96
|
+
private :use_count_up
|
97
|
+
|
98
|
+
def use_count_down
|
99
|
+
@use_count -= 1
|
100
|
+
if @use_count == 0
|
101
|
+
@remove_func.call
|
102
|
+
@open_func = @remove_func = nil
|
103
|
+
end
|
104
|
+
end
|
105
|
+
private :use_count_down
|
106
|
+
|
107
|
+
# delay removing the tempfile until the given block is finished.
|
108
|
+
def use
|
109
|
+
if !@open_func
|
110
|
+
raise ArgumentError, "FileEnumerator reused."
|
111
|
+
end
|
112
|
+
use_count_up
|
113
|
+
begin
|
114
|
+
yield
|
115
|
+
ensure
|
116
|
+
use_count_down
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def each
|
121
|
+
if block_given?
|
122
|
+
self.use {
|
123
|
+
begin
|
124
|
+
io = @open_func.call
|
125
|
+
while true
|
126
|
+
objs = Marshal.load(io)
|
127
|
+
yield(*objs)
|
128
|
+
end
|
129
|
+
rescue EOFError
|
130
|
+
ensure
|
131
|
+
io.close
|
132
|
+
end
|
133
|
+
}
|
134
|
+
else
|
135
|
+
Reader.new(self)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def open_reader
|
140
|
+
reader = Reader.new(self)
|
141
|
+
reader.use {
|
142
|
+
yield reader
|
143
|
+
}
|
144
|
+
end
|
145
|
+
|
146
|
+
class Reader
|
147
|
+
def initialize(fileenumerator)
|
148
|
+
@use_count = 0
|
149
|
+
@fileenumerator = fileenumerator
|
150
|
+
@fileenumerator.send(:use_count_up)
|
151
|
+
@io = @fileenumerator.instance_eval { @open_func.call }
|
152
|
+
peek_reset
|
153
|
+
end
|
154
|
+
|
155
|
+
def closed?
|
156
|
+
@io.nil?
|
157
|
+
end
|
158
|
+
|
159
|
+
def finalize
|
160
|
+
@io.close
|
161
|
+
@io = nil
|
162
|
+
@fileenumerator.send(:use_count_down)
|
163
|
+
peek_reset
|
164
|
+
end
|
165
|
+
private :finalize
|
166
|
+
|
167
|
+
def use_begin
|
168
|
+
@use_count += 1
|
169
|
+
end
|
170
|
+
|
171
|
+
def use_end
|
172
|
+
@use_count -= 1
|
173
|
+
if @use_count == 0
|
174
|
+
finalize
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def use
|
179
|
+
use_begin
|
180
|
+
begin
|
181
|
+
yield
|
182
|
+
ensure
|
183
|
+
use_end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def peek_reset
|
188
|
+
@peeked = false
|
189
|
+
@peeked_objs = nil
|
190
|
+
if @io
|
191
|
+
@pos = @io.pos
|
192
|
+
else
|
193
|
+
@pos = nil
|
194
|
+
end
|
195
|
+
end
|
196
|
+
private :peek_reset
|
197
|
+
|
198
|
+
def pos
|
199
|
+
@pos
|
200
|
+
end
|
201
|
+
|
202
|
+
def pos=(val)
|
203
|
+
@io.seek(val)
|
204
|
+
peek_reset
|
205
|
+
nil
|
206
|
+
end
|
207
|
+
|
208
|
+
def peek_values
|
209
|
+
if !@io
|
210
|
+
raise StopIteration
|
211
|
+
end
|
212
|
+
if !@peeked
|
213
|
+
begin
|
214
|
+
objs = Marshal.load(@io)
|
215
|
+
rescue EOFError
|
216
|
+
if @use_count == 0
|
217
|
+
finalize
|
218
|
+
end
|
219
|
+
raise StopIteration
|
220
|
+
end
|
221
|
+
@peeked = true
|
222
|
+
@peeked_objs = objs
|
223
|
+
end
|
224
|
+
@peeked_objs
|
225
|
+
end
|
226
|
+
|
227
|
+
def peek
|
228
|
+
result = self.peek_values
|
229
|
+
if result.kind_of?(Array) && result.length == 1
|
230
|
+
result = result[0]
|
231
|
+
end
|
232
|
+
result
|
233
|
+
end
|
234
|
+
|
235
|
+
def next_values
|
236
|
+
result = self.peek_values
|
237
|
+
peek_reset
|
238
|
+
result
|
239
|
+
end
|
240
|
+
|
241
|
+
def next
|
242
|
+
result = self.peek
|
243
|
+
peek_reset
|
244
|
+
result
|
245
|
+
end
|
246
|
+
|
247
|
+
def rewind
|
248
|
+
if !@io
|
249
|
+
raise ArgumentError, "already closed."
|
250
|
+
end
|
251
|
+
@io.rewind
|
252
|
+
peek_reset
|
253
|
+
nil
|
254
|
+
end
|
255
|
+
|
256
|
+
def subeach_by(&distinguish_value)
|
257
|
+
Enumerator.new {|y|
|
258
|
+
begin
|
259
|
+
vs = self.peek_values
|
260
|
+
rescue StopIteration
|
261
|
+
next
|
262
|
+
end
|
263
|
+
dv = distinguish_value.call(*vs)
|
264
|
+
while true
|
265
|
+
y.yield(*vs)
|
266
|
+
self.next_values
|
267
|
+
begin
|
268
|
+
next_vs = self.peek_values
|
269
|
+
rescue StopIteration
|
270
|
+
break
|
271
|
+
end
|
272
|
+
next_dv = distinguish_value.call(*next_vs)
|
273
|
+
if dv != next_dv
|
274
|
+
break
|
275
|
+
end
|
276
|
+
vs = next_vs
|
277
|
+
dv = next_dv
|
278
|
+
end
|
279
|
+
nil
|
280
|
+
}
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
module Tb::Enum
|
286
|
+
# creates a Tb::FileHeaderEnumerator object.
|
287
|
+
#
|
288
|
+
def to_fileenumerator
|
289
|
+
hbuilder = Tb::FileHeaderEnumerator.builder
|
290
|
+
self.with_header {|header|
|
291
|
+
if header
|
292
|
+
hbuilder.header.concat(header - hbuilder.header)
|
293
|
+
end
|
294
|
+
}.each {|pairs|
|
295
|
+
hbuilder.gen(pairs)
|
296
|
+
}
|
297
|
+
hbuilder.new
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class Tb::FileHeaderEnumerator < Tb::FileEnumerator
|
302
|
+
include Tb::Enum
|
303
|
+
|
304
|
+
class HBuilder
|
305
|
+
def initialize(klass)
|
306
|
+
@klass = klass
|
307
|
+
@tempfile = Tempfile.new("tb")
|
308
|
+
@tempfile.binmode
|
309
|
+
@header = []
|
310
|
+
end
|
311
|
+
attr_reader :header
|
312
|
+
|
313
|
+
def gen(*objs)
|
314
|
+
@header |= objs[0].keys
|
315
|
+
Marshal.dump(objs, @tempfile)
|
316
|
+
end
|
317
|
+
|
318
|
+
def new
|
319
|
+
@tempfile.close
|
320
|
+
@klass.new(
|
321
|
+
@header,
|
322
|
+
lambda { open(@tempfile.path, "rb") },
|
323
|
+
lambda { @tempfile.close(true) })
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
def self.builder
|
328
|
+
HBuilder.new(Tb::FileHeaderEnumerator)
|
329
|
+
end
|
330
|
+
|
331
|
+
def self.gen_new
|
332
|
+
hgen = self.builder
|
333
|
+
return hgen.method(:gen), hgen.method(:new)
|
334
|
+
end
|
335
|
+
|
336
|
+
def initialize(header, open_func, remove_func)
|
337
|
+
super open_func, remove_func
|
338
|
+
@header = header
|
339
|
+
end
|
340
|
+
|
341
|
+
def header_and_each(header_proc)
|
342
|
+
self.use {
|
343
|
+
header_proc.call(@header) if header_proc
|
344
|
+
begin
|
345
|
+
io = @open_func.call
|
346
|
+
while true
|
347
|
+
objs = Marshal.load(io)
|
348
|
+
yield(*objs)
|
349
|
+
end
|
350
|
+
rescue EOFError
|
351
|
+
ensure
|
352
|
+
io.close
|
353
|
+
end
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
def each(&block)
|
358
|
+
if block_given?
|
359
|
+
header_and_each(nil, &block)
|
360
|
+
else
|
361
|
+
Tb::FileEnumerator::Reader.new(self)
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
end
|