tb 0.9 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README +13 -11
  3. data/lib/tb.rb +14 -6
  4. data/lib/tb/catreader.rb +2 -2
  5. data/lib/tb/cmd_consecutive.rb +6 -2
  6. data/lib/tb/cmd_crop.rb +22 -3
  7. data/lib/tb/cmd_cross.rb +24 -0
  8. data/lib/tb/cmd_cut.rb +20 -10
  9. data/lib/tb/cmd_git.rb +20 -7
  10. data/lib/tb/cmd_group.rb +32 -0
  11. data/lib/tb/cmd_gsub.rb +21 -0
  12. data/lib/tb/cmd_join.rb +28 -0
  13. data/lib/tb/cmd_ls.rb +9 -0
  14. data/lib/tb/cmd_melt.rb +15 -0
  15. data/lib/tb/cmd_mheader.rb +15 -0
  16. data/lib/tb/cmd_nest.rb +27 -6
  17. data/lib/tb/cmd_newfield.rb +19 -2
  18. data/lib/tb/cmd_rename.rb +20 -0
  19. data/lib/tb/{cmd_grep.rb → cmd_search.rb} +37 -23
  20. data/lib/tb/cmd_shape.rb +69 -25
  21. data/lib/tb/cmd_sort.rb +20 -0
  22. data/lib/tb/cmd_tar.rb +38 -0
  23. data/lib/tb/cmd_to_json.rb +2 -2
  24. data/lib/tb/cmd_to_ltsv.rb +3 -3
  25. data/lib/tb/cmd_to_pnm.rb +3 -3
  26. data/lib/tb/cmd_to_tsv.rb +3 -3
  27. data/lib/tb/cmd_to_yaml.rb +3 -3
  28. data/lib/tb/cmd_unmelt.rb +15 -0
  29. data/lib/tb/cmd_unnest.rb +31 -7
  30. data/lib/tb/cmdmain.rb +2 -0
  31. data/lib/tb/cmdtop.rb +1 -1
  32. data/lib/tb/cmdutil.rb +9 -62
  33. data/lib/tb/csv.rb +21 -79
  34. data/lib/tb/enumerable.rb +42 -68
  35. data/lib/tb/enumerator.rb +15 -7
  36. data/lib/tb/{fieldset.rb → hashreader.rb} +37 -56
  37. data/lib/tb/hashwriter.rb +54 -0
  38. data/lib/tb/headerreader.rb +108 -0
  39. data/lib/tb/headerwriter.rb +116 -0
  40. data/lib/tb/json.rb +17 -15
  41. data/lib/tb/ltsv.rb +35 -96
  42. data/lib/tb/ndjson.rb +63 -0
  43. data/lib/tb/numericreader.rb +66 -0
  44. data/lib/tb/numericwriter.rb +61 -0
  45. data/lib/tb/pnm.rb +206 -200
  46. data/lib/tb/ropen.rb +54 -59
  47. data/lib/tb/tsv.rb +39 -71
  48. data/sample/excel2csv +24 -25
  49. data/sample/poi-xls2csv.rb +13 -14
  50. data/tb.gemspec +154 -0
  51. data/test/test_cmd_cat.rb +28 -6
  52. data/test/test_cmd_consecutive.rb +8 -3
  53. data/test/test_cmd_cut.rb +14 -4
  54. data/test/test_cmd_git_log.rb +50 -50
  55. data/test/test_cmd_grep.rb +6 -6
  56. data/test/test_cmd_gsub.rb +7 -2
  57. data/test/test_cmd_ls.rb +70 -62
  58. data/test/test_cmd_shape.rb +43 -6
  59. data/test/test_cmd_svn_log.rb +26 -27
  60. data/test/test_cmd_to_csv.rb +10 -5
  61. data/test/test_cmd_to_json.rb +16 -0
  62. data/test/test_cmd_to_ltsv.rb +2 -2
  63. data/test/test_cmd_to_pp.rb +7 -2
  64. data/test/test_csv.rb +74 -62
  65. data/test/test_ex_enumerable.rb +0 -1
  66. data/test/test_fileenumerator.rb +3 -3
  67. data/test/test_headercsv.rb +43 -0
  68. data/test/test_json.rb +2 -2
  69. data/test/test_ltsv.rb +22 -17
  70. data/test/test_ndjson.rb +62 -0
  71. data/test/test_numericcsv.rb +36 -0
  72. data/test/test_pnm.rb +69 -70
  73. data/test/test_reader.rb +27 -124
  74. data/test/test_tbenum.rb +18 -18
  75. data/test/test_tsv.rb +21 -32
  76. data/test/util_tbtest.rb +12 -0
  77. metadata +41 -19
  78. data/lib/tb/basic.rb +0 -1070
  79. data/lib/tb/reader.rb +0 -106
  80. data/lib/tb/record.rb +0 -158
  81. data/test/test_basic.rb +0 -403
  82. data/test/test_fieldset.rb +0 -42
  83. data/test/test_record.rb +0 -61
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2012-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -44,13 +44,25 @@ class Tb::Yielder
44
44
  if !@header_proc_called
45
45
  set_header(nil)
46
46
  end
47
+ unless args.is_a?(Array) && args.length == 1 && args[0].is_a?(Hash)
48
+ raise "unexpected args: #{args.inspect}"
49
+ end
47
50
  @base_yielder.yield(*args)
48
51
  end
49
52
  alias << yield
50
53
  end
51
54
 
52
55
  class Tb::Enumerator < Enumerator
53
- include Tb::Enumerable
56
+ include Tb::EnumerableWithEach
57
+
58
+ def self.from_header_and_values(header, *values_list)
59
+ Tb::Enumerator.new {|y|
60
+ y.set_header header
61
+ values_list.each {|values|
62
+ y.yield Hash[header.zip(values)]
63
+ }
64
+ }
65
+ end
54
66
 
55
67
  def self.new(&enumerator_proc)
56
68
  super() {|y|
@@ -58,16 +70,12 @@ class Tb::Enumerator < Enumerator
58
70
  Thread.current[:tb_enumerator_header_proc] = nil
59
71
  ty = Tb::Yielder.new(header_proc, y)
60
72
  enumerator_proc.call(ty)
61
- if !ty.header_proc_called
73
+ if header_proc && !ty.header_proc_called
62
74
  header_proc.call(nil)
63
75
  end
64
76
  }
65
77
  end
66
78
 
67
- def each(&each_proc)
68
- header_and_each(nil, &each_proc)
69
- end
70
-
71
79
  def header_and_each(header_proc, &each_proc)
72
80
  old = Thread.current[:tb_enumerator_header_proc]
73
81
  begin
@@ -1,6 +1,6 @@
1
- # lib/tb/fieldset.rb - Tb::FieldSet class
1
+ # lib/tb/hashreaderm.rb - reader mixin for table containing hashes
2
2
  #
3
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -28,75 +28,56 @@
28
28
  # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
29
  # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
30
 
31
- class Tb::FieldSet
32
- def self.normalize(header)
33
- Tb::FieldSet.new(*header).header
34
- end
31
+ require 'tempfile'
32
+
33
+ class Tb::HashReader
34
+ include Tb::EnumerableWithEach
35
35
 
36
- def initialize(*fs)
37
- @header = []
38
- @field2index = {}
39
- fs.each {|f| add_field(f) }
36
+ def initialize(get_hash)
37
+ @get_hash = get_hash
40
38
  end
41
- attr_reader :header
42
39
 
43
- def add_field(hint)
44
- hint = '1' if hint.nil? || hint == ''
45
- while @field2index[hint]
46
- case hint
47
- when /\A[1-9][0-9]*\z/
48
- hint = (hint.to_i + 1).to_s
49
- when /\([1-9][0-9]*\)\z/
50
- hint = hint.sub(/\(([1-9][0-9]*)\)\z/) { "(#{$1.to_i + 1})" }
51
- else
52
- hint = "#{hint}(2)"
53
- end
54
- end
55
- @field2index[hint] = @header.length
56
- @header << hint
57
- hint
40
+ def header_known?
41
+ false
58
42
  end
59
- private :add_field
60
43
 
61
- def index_from_field_ex(f)
62
- i = @field2index[f]
63
- return i if !i.nil?
64
- if /\A[1-9][0-9]*\z/ !~ f
65
- raise ArgumentError, "unexpected field name: #{f.inspect}"
44
+ def get_named_header
45
+ if defined? @hashreader_header_complete
46
+ return @hashreader_header_complete
66
47
  end
67
- while true
68
- if add_field(nil) == f
69
- return @header.length-1
70
- end
48
+ @hashreader_buffer = []
49
+ while hash = @get_hash.call
50
+ update_header hash
51
+ @hashreader_buffer << hash
71
52
  end
53
+ update_header nil
72
54
  end
73
55
 
74
- def index_from_field(f)
75
- i = @field2index[f]
76
- if i.nil?
77
- raise ArgumentError, "unexpected field name: #{f.inspect}"
56
+ def get_hash
57
+ if defined? @hashreader_buffer
58
+ return @hashreader_buffer.shift
78
59
  end
79
- i
60
+ hash = @get_hash.call
61
+ update_header hash
62
+ hash
80
63
  end
81
64
 
82
- def field_from_index_ex(i)
83
- raise ArgumentError, "negative index: #{i}" if i < 0
84
- until i < @header.length
85
- add_field(nil)
65
+ def update_header(hash)
66
+ unless defined? @hashreader_header_partial
67
+ @hashreader_header_partial = []
86
68
  end
87
- @header[i]
88
- end
89
-
90
- def field_from_index(i)
91
- raise ArgumentError, "negative index: #{i}" if i < 0
92
- f = @header[i]
93
- if f.nil?
94
- raise ArgumentError, "index too big: #{i}"
69
+ if hash
70
+ @hashreader_header_partial.concat(hash.keys - @hashreader_header_partial)
71
+ else
72
+ @hashreader_header_complete = @hashreader_header_partial
95
73
  end
96
- f
97
74
  end
98
75
 
99
- def length
100
- @header.length
76
+ def header_and_each(header_proc)
77
+ header_proc.call(get_named_header) if header_proc
78
+ while hash = get_hash
79
+ yield hash
80
+ end
81
+ nil
101
82
  end
102
83
  end
@@ -0,0 +1,54 @@
1
+ # lib/tb/hashwriterm.rb - writer mixin for table containing hashes
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ require 'tempfile'
32
+
33
+ class Tb::HashWriter
34
+ def initialize(put_hash, put_finish=nil)
35
+ @put_hash = put_hash
36
+ @put_finish = put_finish
37
+ end
38
+
39
+ def header_required?
40
+ false
41
+ end
42
+
43
+ def header_generator=(gen)
44
+ end
45
+
46
+ def put_hash(hash)
47
+ @put_hash.call hash
48
+ nil
49
+ end
50
+
51
+ def finish
52
+ @put_finish.call if @put_finish
53
+ end
54
+ end
@@ -0,0 +1,108 @@
1
+ # lib/tb/headerreaderm.rb - reader mixin for table with header
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ class Tb::HeaderReader
32
+ include Tb::EnumerableWithEach
33
+
34
+ def initialize(get_array)
35
+ @get_array = get_array
36
+ @header_array_hook = nil
37
+ @row_array_hook = nil
38
+ @enable_warning = true
39
+ end
40
+ attr_accessor :header_array_hook
41
+ attr_accessor :row_array_hook
42
+ attr_accessor :enable_warning
43
+
44
+ def header_known?
45
+ true
46
+ end
47
+
48
+ def read_header_once
49
+ return if defined? @header
50
+ begin
51
+ @header = @get_array.call
52
+ end while @header && @header.all? {|elt| elt.nil? || elt == '' }
53
+ if !@header
54
+ @header = []
55
+ end
56
+ @header_array_hook.call(@header) if @header_array_hook
57
+ h = Hash.new { [] }
58
+ @header.each_with_index {|f, i|
59
+ h[f] <<= i
60
+ }
61
+ if h.has_key? nil
62
+ warn "Empty header field #{h[nil].map(&:succ).join(',')}" if @enable_warning
63
+ end
64
+ h.each {|f, is|
65
+ if 1 < is.length
66
+ warn "Ambiguous header field: field #{is.map(&:succ).join(',')} has same name #{f.inspect}" if @enable_warning
67
+ is[1..-1].each {|i|
68
+ @header[i] = nil
69
+ }
70
+ end
71
+ }
72
+ end
73
+ private :read_header_once
74
+
75
+ def get_named_header
76
+ read_header_once
77
+ @header.compact
78
+ end
79
+
80
+ def get_hash
81
+ read_header_once
82
+ ary = @get_array.call
83
+ if !ary
84
+ return nil
85
+ end
86
+ @row_array_hook.call(ary) if @row_array_hook
87
+ hash = {}
88
+ if @header.length < ary.length
89
+ warn "Header too short: header has #{@header.length} fields but a record has #{ary.length} fields : #{ary[@header.length..-1].map(&:inspect).join(',')}" if @enable_warning
90
+ ary[@header.length..-1] = []
91
+ end
92
+ ary.each_with_index {|v, i|
93
+ field = @header[i]
94
+ if !field.nil?
95
+ hash[field] = v
96
+ end
97
+ }
98
+ hash
99
+ end
100
+
101
+ def header_and_each(header_proc)
102
+ header_proc.call(get_named_header) if header_proc
103
+ while hash = get_hash
104
+ yield hash
105
+ end
106
+ nil
107
+ end
108
+ end
@@ -0,0 +1,116 @@
1
+ # lib/tb/headerwriterm.rb - writer mixin for table with header
2
+ #
3
+ # Copyright (C) 2014 Tanaka Akira <akr@fsij.org>
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions
7
+ # are met:
8
+ #
9
+ # 1. Redistributions of source code must retain the above copyright
10
+ # notice, this list of conditions and the following disclaimer.
11
+ # 2. Redistributions in binary form must reproduce the above
12
+ # copyright notice, this list of conditions and the following
13
+ # disclaimer in the documentation and/or other materials provided
14
+ # with the distribution.
15
+ # 3. The name of the author may not be used to endorse or promote
16
+ # products derived from this software without specific prior
17
+ # written permission.
18
+ #
19
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
23
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ require 'tempfile'
32
+
33
+ class Tb::HeaderWriter
34
+ def initialize(put_array)
35
+ @put_array = put_array
36
+ end
37
+
38
+ def header_required?
39
+ true
40
+ end
41
+
42
+ def header_generator=(gen)
43
+ @header_generator = gen
44
+ end
45
+
46
+ def generate_header_if_possible
47
+ return if defined? @header_use_buffer
48
+ header = nil
49
+ if defined? @header_generator
50
+ header = @header_generator.call
51
+ end
52
+ if header
53
+ @header_use_buffer = false
54
+ @header = header
55
+ @put_array.call @header
56
+ else
57
+ @header_use_buffer = true
58
+ @header = []
59
+ @header_buffer = Tempfile.new('tb')
60
+ end
61
+ end
62
+
63
+ def put_hash(hash)
64
+ generate_header_if_possible
65
+ if @header_use_buffer
66
+ put_hash_buffer(hash)
67
+ else
68
+ put_hash_immediate(hash)
69
+ end
70
+ nil
71
+ end
72
+
73
+ def put_hash_buffer(hash)
74
+ Marshal.dump(hash, @header_buffer)
75
+ (hash.map {|k, v| k } - @header).each {|f|
76
+ @header << f
77
+ }
78
+ end
79
+ private :put_hash_buffer
80
+
81
+ def finish
82
+ generate_header_if_possible
83
+ if @header_use_buffer == nil
84
+ generate_header_if_possible
85
+ end
86
+ if @header_use_buffer
87
+ @header_buffer.rewind
88
+ @put_array.call @header
89
+ begin
90
+ while true
91
+ hash = Marshal.load(@header_buffer)
92
+ put_hash_immediate(hash)
93
+ end
94
+ rescue EOFError
95
+ end
96
+ @header_buffer.close!
97
+ end
98
+ end
99
+
100
+ def put_hash_immediate(hash)
101
+ ary = []
102
+ @header.each_with_index {|f, i|
103
+ if pair = hash.find {|k, v| k == f }
104
+ ary[i] = pair.last
105
+ end
106
+ }
107
+ (hash.map {|k, v| k } - @header).each {|f|
108
+ warn "unexpected field: #{f.inspect}" if /\A[1-9][0-9]*\z/ !~ f
109
+ i = @header.length
110
+ @header << f
111
+ ary[i] = hash[f]
112
+ }
113
+ @put_array.call ary
114
+ end
115
+ private :put_hash_immediate
116
+ end