tb 0.9 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README +13 -11
  3. data/lib/tb.rb +14 -6
  4. data/lib/tb/catreader.rb +2 -2
  5. data/lib/tb/cmd_consecutive.rb +6 -2
  6. data/lib/tb/cmd_crop.rb +22 -3
  7. data/lib/tb/cmd_cross.rb +24 -0
  8. data/lib/tb/cmd_cut.rb +20 -10
  9. data/lib/tb/cmd_git.rb +20 -7
  10. data/lib/tb/cmd_group.rb +32 -0
  11. data/lib/tb/cmd_gsub.rb +21 -0
  12. data/lib/tb/cmd_join.rb +28 -0
  13. data/lib/tb/cmd_ls.rb +9 -0
  14. data/lib/tb/cmd_melt.rb +15 -0
  15. data/lib/tb/cmd_mheader.rb +15 -0
  16. data/lib/tb/cmd_nest.rb +27 -6
  17. data/lib/tb/cmd_newfield.rb +19 -2
  18. data/lib/tb/cmd_rename.rb +20 -0
  19. data/lib/tb/{cmd_grep.rb → cmd_search.rb} +37 -23
  20. data/lib/tb/cmd_shape.rb +69 -25
  21. data/lib/tb/cmd_sort.rb +20 -0
  22. data/lib/tb/cmd_tar.rb +38 -0
  23. data/lib/tb/cmd_to_json.rb +2 -2
  24. data/lib/tb/cmd_to_ltsv.rb +3 -3
  25. data/lib/tb/cmd_to_pnm.rb +3 -3
  26. data/lib/tb/cmd_to_tsv.rb +3 -3
  27. data/lib/tb/cmd_to_yaml.rb +3 -3
  28. data/lib/tb/cmd_unmelt.rb +15 -0
  29. data/lib/tb/cmd_unnest.rb +31 -7
  30. data/lib/tb/cmdmain.rb +2 -0
  31. data/lib/tb/cmdtop.rb +1 -1
  32. data/lib/tb/cmdutil.rb +9 -62
  33. data/lib/tb/csv.rb +21 -79
  34. data/lib/tb/enumerable.rb +42 -68
  35. data/lib/tb/enumerator.rb +15 -7
  36. data/lib/tb/{fieldset.rb → hashreader.rb} +37 -56
  37. data/lib/tb/hashwriter.rb +54 -0
  38. data/lib/tb/headerreader.rb +108 -0
  39. data/lib/tb/headerwriter.rb +116 -0
  40. data/lib/tb/json.rb +17 -15
  41. data/lib/tb/ltsv.rb +35 -96
  42. data/lib/tb/ndjson.rb +63 -0
  43. data/lib/tb/numericreader.rb +66 -0
  44. data/lib/tb/numericwriter.rb +61 -0
  45. data/lib/tb/pnm.rb +206 -200
  46. data/lib/tb/ropen.rb +54 -59
  47. data/lib/tb/tsv.rb +39 -71
  48. data/sample/excel2csv +24 -25
  49. data/sample/poi-xls2csv.rb +13 -14
  50. data/tb.gemspec +154 -0
  51. data/test/test_cmd_cat.rb +28 -6
  52. data/test/test_cmd_consecutive.rb +8 -3
  53. data/test/test_cmd_cut.rb +14 -4
  54. data/test/test_cmd_git_log.rb +50 -50
  55. data/test/test_cmd_grep.rb +6 -6
  56. data/test/test_cmd_gsub.rb +7 -2
  57. data/test/test_cmd_ls.rb +70 -62
  58. data/test/test_cmd_shape.rb +43 -6
  59. data/test/test_cmd_svn_log.rb +26 -27
  60. data/test/test_cmd_to_csv.rb +10 -5
  61. data/test/test_cmd_to_json.rb +16 -0
  62. data/test/test_cmd_to_ltsv.rb +2 -2
  63. data/test/test_cmd_to_pp.rb +7 -2
  64. data/test/test_csv.rb +74 -62
  65. data/test/test_ex_enumerable.rb +0 -1
  66. data/test/test_fileenumerator.rb +3 -3
  67. data/test/test_headercsv.rb +43 -0
  68. data/test/test_json.rb +2 -2
  69. data/test/test_ltsv.rb +22 -17
  70. data/test/test_ndjson.rb +62 -0
  71. data/test/test_numericcsv.rb +36 -0
  72. data/test/test_pnm.rb +69 -70
  73. data/test/test_reader.rb +27 -124
  74. data/test/test_tbenum.rb +18 -18
  75. data/test/test_tsv.rb +21 -32
  76. data/test/util_tbtest.rb +12 -0
  77. metadata +41 -19
  78. data/lib/tb/basic.rb +0 -1070
  79. data/lib/tb/reader.rb +0 -106
  80. data/lib/tb/record.rb +0 -158
  81. data/test/test_basic.rb +0 -403
  82. data/test/test_fieldset.rb +0 -42
  83. data/test/test_record.rb +0 -61
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2012-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -44,13 +44,25 @@ class Tb::Yielder
44
44
  if !@header_proc_called
45
45
  set_header(nil)
46
46
  end
47
+ unless args.is_a?(Array) && args.length == 1 && args[0].is_a?(Hash)
48
+ raise "unexpected args: #{args.inspect}"
49
+ end
47
50
  @base_yielder.yield(*args)
48
51
  end
49
52
  alias << yield
50
53
  end
51
54
 
52
55
  class Tb::Enumerator < Enumerator
53
- include Tb::Enumerable
56
+ include Tb::EnumerableWithEach
57
+
58
+ def self.from_header_and_values(header, *values_list)
59
+ Tb::Enumerator.new {|y|
60
+ y.set_header header
61
+ values_list.each {|values|
62
+ y.yield Hash[header.zip(values)]
63
+ }
64
+ }
65
+ end
54
66
 
55
67
  def self.new(&enumerator_proc)
56
68
  super() {|y|
@@ -58,16 +70,12 @@ class Tb::Enumerator < Enumerator
58
70
  Thread.current[:tb_enumerator_header_proc] = nil
59
71
  ty = Tb::Yielder.new(header_proc, y)
60
72
  enumerator_proc.call(ty)
61
- if !ty.header_proc_called
73
+ if header_proc && !ty.header_proc_called
62
74
  header_proc.call(nil)
63
75
  end
64
76
  }
65
77
  end
66
78
 
67
- def each(&each_proc)
68
- header_and_each(nil, &each_proc)
69
- end
70
-
71
79
  def header_and_each(header_proc, &each_proc)
72
80
  old = Thread.current[:tb_enumerator_header_proc]
73
81
  begin
@@ -1,6 +1,6 @@
1
- # lib/tb/fieldset.rb - Tb::FieldSet class
1
+ # lib/tb/hashreaderm.rb - reader mixin for table containing hashes
2
2
  #
3
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -28,75 +28,56 @@
28
28
  # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
29
  # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
30
 
31
- class Tb::FieldSet
32
- def self.normalize(header)
33
- Tb::FieldSet.new(*header).header
34
- end
31
+ require 'tempfile'
32
+
33
+ class Tb::HashReader
34
+ include Tb::EnumerableWithEach
35
35
 
36
- def initialize(*fs)
37
- @header = []
38
- @field2index = {}
39
- fs.each {|f| add_field(f) }
36
+ def initialize(get_hash)
37
+ @get_hash = get_hash
40
38
  end
41
- attr_reader :header
42
39
 
43
- def add_field(hint)
44
- hint = '1' if hint.nil? || hint == ''
45
- while @field2index[hint]
46
- case hint
47
- when /\A[1-9][0-9]*\z/
48
- hint = (hint.to_i + 1).to_s
49
- when /\([1-9][0-9]*\)\z/
50
- hint = hint.sub(/\(([1-9][0-9]*)\)\z/) { "(#{$1.to_i + 1})" }
51
- else
52
- hint = "#{hint}(2)"
53
- end
54
- end
55
- @field2index[hint] = @header.length
56
- @header << hint
57
- hint
40
+ def header_known?
41
+ false
58
42
  end
59
- private :add_field
60
43
 
61
- def index_from_field_ex(f)
62
- i = @field2index[f]
63
- return i if !i.nil?
64
- if /\A[1-9][0-9]*\z/ !~ f
65
- raise ArgumentError, "unexpected field name: #{f.inspect}"
44
+ def get_named_header
45
+ if defined? @hashreader_header_complete
46
+ return @hashreader_header_complete
66
47
  end
67
- while true
68
- if add_field(nil) == f
69
- return @header.length-1
70
- end
48
+ @hashreader_buffer = []
49
+ while hash = @get_hash.call
50
+ update_header hash
51
+ @hashreader_buffer << hash
71
52
  end
53
+ update_header nil
72
54
  end
73
55
 
74
- def index_from_field(f)
75
- i = @field2index[f]
76
- if i.nil?
77
- raise ArgumentError, "unexpected field name: #{f.inspect}"
56
+ def get_hash
57
+ if defined? @hashreader_buffer
58
+ return @hashreader_buffer.shift
78
59
  end
79
- i
60
+ hash = @get_hash.call
61
+ update_header hash
62
+ hash
80
63
  end
81
64
 
82
- def field_from_index_ex(i)
83
- raise ArgumentError, "negative index: #{i}" if i < 0
84
- until i < @header.length
85
- add_field(nil)
65
+ def update_header(hash)
66
+ unless defined? @hashreader_header_partial
67
+ @hashreader_header_partial = []
86
68
  end
87
- @header[i]
88
- end
89
-
90
- def field_from_index(i)
91
- raise ArgumentError, "negative index: #{i}" if i < 0
92
- f = @header[i]
93
- if f.nil?
94
- raise ArgumentError, "index too big: #{i}"
69
+ if hash
70
+ @hashreader_header_partial.concat(hash.keys - @hashreader_header_partial)
71
+ else
72
+ @hashreader_header_complete = @hashreader_header_partial
95
73
  end
96
- f
97
74
  end
98
75
 
99
- def length
100
- @header.length
76
+ def header_and_each(header_proc)
77
+ header_proc.call(get_named_header) if header_proc
78
+ while hash = get_hash
79
+ yield hash
80
+ end
81
+ nil
101
82
  end
102
83
  end
@@ -0,0 +1,54 @@
1
+ # lib/tb/hashwriterm.rb - writer mixin for table containing hashes
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ require 'tempfile'
32
+
33
+ class Tb::HashWriter
34
+ def initialize(put_hash, put_finish=nil)
35
+ @put_hash = put_hash
36
+ @put_finish = put_finish
37
+ end
38
+
39
+ def header_required?
40
+ false
41
+ end
42
+
43
+ def header_generator=(gen)
44
+ end
45
+
46
+ def put_hash(hash)
47
+ @put_hash.call hash
48
+ nil
49
+ end
50
+
51
+ def finish
52
+ @put_finish.call if @put_finish
53
+ end
54
+ end
@@ -0,0 +1,108 @@
1
+ # lib/tb/headerreaderm.rb - reader mixin for table with header
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ class Tb::HeaderReader
32
+ include Tb::EnumerableWithEach
33
+
34
+ def initialize(get_array)
35
+ @get_array = get_array
36
+ @header_array_hook = nil
37
+ @row_array_hook = nil
38
+ @enable_warning = true
39
+ end
40
+ attr_accessor :header_array_hook
41
+ attr_accessor :row_array_hook
42
+ attr_accessor :enable_warning
43
+
44
+ def header_known?
45
+ true
46
+ end
47
+
48
+ def read_header_once
49
+ return if defined? @header
50
+ begin
51
+ @header = @get_array.call
52
+ end while @header && @header.all? {|elt| elt.nil? || elt == '' }
53
+ if !@header
54
+ @header = []
55
+ end
56
+ @header_array_hook.call(@header) if @header_array_hook
57
+ h = Hash.new { [] }
58
+ @header.each_with_index {|f, i|
59
+ h[f] <<= i
60
+ }
61
+ if h.has_key? nil
62
+ warn "Empty header field #{h[nil].map(&:succ).join(',')}" if @enable_warning
63
+ end
64
+ h.each {|f, is|
65
+ if 1 < is.length
66
+ warn "Ambiguous header field: field #{is.map(&:succ).join(',')} has same name #{f.inspect}" if @enable_warning
67
+ is[1..-1].each {|i|
68
+ @header[i] = nil
69
+ }
70
+ end
71
+ }
72
+ end
73
+ private :read_header_once
74
+
75
+ def get_named_header
76
+ read_header_once
77
+ @header.compact
78
+ end
79
+
80
+ def get_hash
81
+ read_header_once
82
+ ary = @get_array.call
83
+ if !ary
84
+ return nil
85
+ end
86
+ @row_array_hook.call(ary) if @row_array_hook
87
+ hash = {}
88
+ if @header.length < ary.length
89
+ warn "Header too short: header has #{@header.length} fields but a record has #{ary.length} fields : #{ary[@header.length..-1].map(&:inspect).join(',')}" if @enable_warning
90
+ ary[@header.length..-1] = []
91
+ end
92
+ ary.each_with_index {|v, i|
93
+ field = @header[i]
94
+ if !field.nil?
95
+ hash[field] = v
96
+ end
97
+ }
98
+ hash
99
+ end
100
+
101
+ def header_and_each(header_proc)
102
+ header_proc.call(get_named_header) if header_proc
103
+ while hash = get_hash
104
+ yield hash
105
+ end
106
+ nil
107
+ end
108
+ end
@@ -0,0 +1,116 @@
1
+ # lib/tb/headerwriterm.rb - writer mixin for table with header
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ require 'tempfile'
32
+
33
+ class Tb::HeaderWriter
34
+ def initialize(put_array)
35
+ @put_array = put_array
36
+ end
37
+
38
+ def header_required?
39
+ true
40
+ end
41
+
42
+ def header_generator=(gen)
43
+ @header_generator = gen
44
+ end
45
+
46
+ def generate_header_if_possible
47
+ return if defined? @header_use_buffer
48
+ header = nil
49
+ if defined? @header_generator
50
+ header = @header_generator.call
51
+ end
52
+ if header
53
+ @header_use_buffer = false
54
+ @header = header
55
+ @put_array.call @header
56
+ else
57
+ @header_use_buffer = true
58
+ @header = []
59
+ @header_buffer = Tempfile.new('tb')
60
+ end
61
+ end
62
+
63
+ def put_hash(hash)
64
+ generate_header_if_possible
65
+ if @header_use_buffer
66
+ put_hash_buffer(hash)
67
+ else
68
+ put_hash_immediate(hash)
69
+ end
70
+ nil
71
+ end
72
+
73
+ def put_hash_buffer(hash)
74
+ Marshal.dump(hash, @header_buffer)
75
+ (hash.map {|k, v| k } - @header).each {|f|
76
+ @header << f
77
+ }
78
+ end
79
+ private :put_hash_buffer
80
+
81
+ def finish
82
+ generate_header_if_possible
83
+ if @header_use_buffer == nil
84
+ generate_header_if_possible
85
+ end
86
+ if @header_use_buffer
87
+ @header_buffer.rewind
88
+ @put_array.call @header
89
+ begin
90
+ while true
91
+ hash = Marshal.load(@header_buffer)
92
+ put_hash_immediate(hash)
93
+ end
94
+ rescue EOFError
95
+ end
96
+ @header_buffer.close!
97
+ end
98
+ end
99
+
100
+ def put_hash_immediate(hash)
101
+ ary = []
102
+ @header.each_with_index {|f, i|
103
+ if pair = hash.find {|k, v| k == f }
104
+ ary[i] = pair.last
105
+ end
106
+ }
107
+ (hash.map {|k, v| k } - @header).each {|f|
108
+ warn "unexpected field: #{f.inspect}" if /\A[1-9][0-9]*\z/ !~ f
109
+ i = @header.length
110
+ @header << f
111
+ ary[i] = hash[f]
112
+ }
113
+ @put_array.call ary
114
+ end
115
+ private :put_hash_immediate
116
+ end