tb 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/README +62 -50
  2. data/bin/tb +22 -18
  3. data/lib/tb.rb +35 -19
  4. data/lib/tb/basic.rb +85 -86
  5. data/lib/tb/catreader.rb +33 -116
  6. data/lib/tb/cmd_cat.rb +31 -27
  7. data/lib/tb/cmd_consecutive.rb +45 -35
  8. data/lib/tb/cmd_crop.rb +86 -52
  9. data/lib/tb/cmd_cross.rb +113 -71
  10. data/lib/tb/cmd_cut.rb +49 -44
  11. data/lib/tb/cmd_git_log.rb +193 -0
  12. data/lib/tb/cmd_grep.rb +43 -32
  13. data/lib/tb/cmd_group.rb +63 -39
  14. data/lib/tb/cmd_gsub.rb +53 -43
  15. data/lib/tb/cmd_help.rb +51 -24
  16. data/lib/tb/cmd_join.rb +32 -35
  17. data/lib/tb/cmd_ls.rb +233 -205
  18. data/lib/tb/cmd_mheader.rb +47 -37
  19. data/lib/tb/cmd_nest.rb +94 -0
  20. data/lib/tb/cmd_newfield.rb +29 -33
  21. data/lib/tb/cmd_rename.rb +40 -32
  22. data/lib/tb/cmd_shape.rb +31 -24
  23. data/lib/tb/cmd_sort.rb +46 -25
  24. data/lib/tb/cmd_svn_log.rb +47 -28
  25. data/lib/tb/cmd_tar_tvf.rb +447 -0
  26. data/lib/tb/cmd_to_csv.rb +60 -0
  27. data/lib/tb/cmd_to_json.rb +60 -0
  28. data/lib/tb/cmd_to_pnm.rb +48 -0
  29. data/lib/tb/cmd_to_pp.rb +71 -0
  30. data/lib/tb/cmd_to_tsv.rb +48 -0
  31. data/lib/tb/cmd_to_yaml.rb +52 -0
  32. data/lib/tb/cmd_unnest.rb +118 -0
  33. data/lib/tb/cmdmain.rb +24 -20
  34. data/lib/tb/cmdtop.rb +33 -25
  35. data/lib/tb/cmdutil.rb +26 -66
  36. data/lib/tb/csv.rb +46 -34
  37. data/lib/tb/enum.rb +294 -0
  38. data/lib/tb/enumerable.rb +198 -7
  39. data/lib/tb/enumerator.rb +73 -0
  40. data/lib/tb/fieldset.rb +27 -19
  41. data/lib/tb/fileenumerator.rb +365 -0
  42. data/lib/tb/json.rb +50 -0
  43. data/lib/tb/pager.rb +6 -6
  44. data/lib/tb/pairs.rb +227 -0
  45. data/lib/tb/pnm.rb +23 -22
  46. data/lib/tb/reader.rb +52 -49
  47. data/lib/tb/record.rb +48 -19
  48. data/lib/tb/revcmp.rb +38 -0
  49. data/lib/tb/ropen.rb +74 -57
  50. data/lib/tb/search.rb +25 -21
  51. data/lib/tb/tsv.rb +31 -34
  52. data/sample/excel2csv +24 -20
  53. data/sample/poi-xls2csv.rb +24 -20
  54. data/sample/poi-xls2csv.sh +22 -18
  55. data/sample/tbplot +185 -127
  56. data/test-all-cov.rb +3 -3
  57. data/test-all.rb +1 -1
  58. data/test/test_basic.rb +26 -10
  59. data/test/test_catreader.rb +7 -6
  60. data/test/test_cmd_cat.rb +32 -0
  61. data/test/test_cmd_consecutive.rb +10 -0
  62. data/test/test_cmd_crop.rb +4 -4
  63. data/test/test_cmd_cross.rb +16 -4
  64. data/test/test_cmd_git_log.rb +46 -0
  65. data/test/test_cmd_help.rb +17 -12
  66. data/test/test_cmd_join.rb +21 -1
  67. data/test/test_cmd_ls.rb +3 -4
  68. data/test/test_cmd_mheader.rb +17 -11
  69. data/test/test_cmd_nest.rb +49 -0
  70. data/test/test_cmd_sort.rb +15 -0
  71. data/test/test_cmd_tar_tvf.rb +281 -0
  72. data/test/{test_cmd_csv.rb → test_cmd_to_csv.rb} +35 -21
  73. data/test/{test_cmd_json.rb → test_cmd_to_json.rb} +31 -3
  74. data/test/{test_cmd_pnm.rb → test_cmd_to_pnm.rb} +2 -2
  75. data/test/{test_cmd_pp.rb → test_cmd_to_pp.rb} +4 -4
  76. data/test/{test_cmd_tsv.rb → test_cmd_to_tsv.rb} +4 -4
  77. data/test/{test_cmd_yaml.rb → test_cmd_to_yaml.rb} +3 -3
  78. data/test/test_cmd_unnest.rb +89 -0
  79. data/test/test_cmdtty.rb +19 -13
  80. data/test/test_enumerable.rb +83 -1
  81. data/test/test_fileenumerator.rb +265 -0
  82. data/test/test_json.rb +15 -0
  83. data/test/test_pager.rb +3 -4
  84. data/test/test_pairs.rb +122 -0
  85. data/test/test_pnm.rb +24 -24
  86. data/test/test_reader.rb +35 -13
  87. data/test/test_revcmp.rb +10 -0
  88. data/test/test_tbenum.rb +173 -0
  89. metadata +51 -23
  90. data/lib/tb/cmd_csv.rb +0 -42
  91. data/lib/tb/cmd_json.rb +0 -60
  92. data/lib/tb/cmd_pnm.rb +0 -43
  93. data/lib/tb/cmd_pp.rb +0 -70
  94. data/lib/tb/cmd_tsv.rb +0 -43
  95. data/lib/tb/cmd_yaml.rb +0 -47
data/lib/tb/cmd_cross.rb CHANGED
@@ -1,26 +1,30 @@
1
- # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions are met:
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
5
6
  #
6
- # 1. Redistributions of source code must retain the above copyright notice, this
7
- # list of conditions and the following disclaimer.
8
- # 2. Redistributions in binary form must reproduce the above copyright notice,
9
- # this list of conditions and the following disclaimer in the documentation
10
- # and/or other materials provided with the distribution.
11
- # 3. The name of the author may not be used to endorse or promote products
12
- # derived from this software without specific prior written permission.
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
13
16
  #
14
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
15
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
17
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
19
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
22
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
23
- # OF SUCH DAMAGE.
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
28
 
25
29
  Tb::Cmd.subcommands << 'cross'
26
30
 
@@ -29,7 +33,8 @@ Tb::Cmd.default_option[:opt_cross_compact] = false
29
33
 
30
34
  def (Tb::Cmd).op_cross
31
35
  op = OptionParser.new
32
- op.banner = 'Usage: tb cross [OPTS] HKEY-FIELD1,... VKEY-FIELD1,... [TABLE ...]'
36
+ op.banner = "Usage: tb cross [OPTS] VKEY-FIELD1,... HKEY-FIELD1,... [TABLE ...]\n" +
37
+ "Create a cross table. (a.k.a contingency table, pivot table)"
33
38
  define_common_option(op, "ho", "--no-pager")
34
39
  op.def_option('-a AGGREGATION-SPEC[,NEW-FIELD]',
35
40
  '--aggregate AGGREGATION-SPEC[,NEW-FIELD]') {|arg| Tb::Cmd.opt_cross_fields << arg }
@@ -40,10 +45,10 @@ end
40
45
  def (Tb::Cmd).main_cross(argv)
41
46
  op_cross.parse!(argv)
42
47
  exit_if_help('cross')
43
- err('no hkey-fields given.') if argv.empty?
44
- hkfs = split_field_list_argument(argv.shift)
45
48
  err('no vkey-fields given.') if argv.empty?
46
49
  vkfs = split_field_list_argument(argv.shift)
50
+ err('no hkey-fields given.') if argv.empty?
51
+ hkfs = split_field_list_argument(argv.shift)
47
52
  if Tb::Cmd.opt_cross_fields.empty?
48
53
  opt_cross_fields = [['count', 'count']]
49
54
  else
@@ -54,66 +59,103 @@ def (Tb::Cmd).main_cross(argv)
54
59
  }
55
60
  end
56
61
  argv = ['-'] if argv.empty?
57
- Tb::CatReader.open(argv, Tb::Cmd.opt_N) {|tblreader|
58
- vkis = vkfs.map {|f| tblreader.index_from_field(f) }
59
- hkis = hkfs.map {|f| tblreader.index_from_field(f) }
60
- vset = {}
61
- hset = {}
62
- set = {}
63
- tblreader.each {|ary|
64
- vkvs = ary.values_at(*vkis)
65
- hkvs = ary.values_at(*hkis)
66
- vset[vkvs] = true if !vset.include?(vkvs)
67
- hset[hkvs] = true if !hset.include?(hkvs)
68
- if !set.include?([vkvs, hkvs])
69
- set[[vkvs, hkvs]] = opt_cross_fields.map {|agg_spec, nf|
62
+ creader = Tb::CatReader.open(argv, Tb::Cmd.opt_N)
63
+ er = Tb::Enumerator.new {|y|
64
+ header = nil
65
+ hvs_hash = {}
66
+ hvs_list = nil
67
+ sorted = creader.extsort_by {|pairs|
68
+ hvs = hkfs.map {|f| pairs[f] }
69
+ hvs_hash[hvs] = true
70
+ vcv = vkfs.map {|f| smart_cmp_value(pairs[f]) }
71
+ vcv
72
+ }
73
+ sorted2 = sorted.with_header {|header0|
74
+ header = header0
75
+ (vkfs + hkfs).each {|f|
76
+ if !header0.include?(f)
77
+ err("field not found: #{f}")
78
+ end
79
+ }
80
+ hvs_list = hvs_hash.keys.sort_by {|hvs| hvs.map {|hv| smart_cmp_value(hv) } }
81
+ n = vkfs.length + hvs_list.length * opt_cross_fields.length
82
+ header1 = (1..n).map {|i| i.to_s }
83
+ y.set_header header1
84
+ hkfs.each_with_index {|hkf, i|
85
+ next if Tb::Cmd.opt_cross_compact && i == hkfs.length - 1
86
+ h1 = {}
87
+ j = vkfs.length
88
+ h1[j.to_s] = hkf
89
+ hvs_list.each {|hkvs|
90
+ opt_cross_fields.length.times {
91
+ j += 1
92
+ h1[j.to_s] = hkvs[i]
93
+ }
94
+ }
95
+ y.yield h1
96
+ }
97
+ h2 = {}
98
+ j = 0
99
+ vkfs.each {|vkf|
100
+ j += 1
101
+ h2[j.to_s] = vkf
102
+ }
103
+ hvs_list.each {|hkvs|
104
+ opt_cross_fields.each {|agg_spec, new_field|
105
+ j += 1
106
+ if Tb::Cmd.opt_cross_compact
107
+ h2[j.to_s] = hkvs[-1]
108
+ else
109
+ h2[j.to_s] = new_field
110
+ end
111
+ }
112
+ }
113
+ y.yield h2
114
+ }
115
+ boudary_p = lambda {|pairs1, pairs2|
116
+ vcv1 = vkfs.map {|f| smart_cmp_value(pairs1[f]) }
117
+ vcv2 = vkfs.map {|f| smart_cmp_value(pairs2[f]) }
118
+ vcv1 != vcv2
119
+ }
120
+ aggs = nil
121
+ before = lambda {|_|
122
+ aggs = {}
123
+ }
124
+ body = lambda {|pairs|
125
+ hvs = hkfs.map {|f| pairs[f] }
126
+ if !aggs.has_key?(hvs)
127
+ aggs[hvs] = opt_cross_fields.map {|agg_spec, nf|
70
128
  begin
71
- ag = make_aggregator(agg_spec, tblreader.header)
129
+ make_aggregator(agg_spec, header)
72
130
  rescue ArgumentError
73
131
  err($!.message)
74
132
  end
75
- ag.update(ary)
76
- ag
77
- }
78
- else
79
- set[[vkvs, hkvs]].each {|ag|
80
- ag.update(ary)
81
133
  }
82
134
  end
135
+ ary = header.map {|f| pairs[f] }
136
+ aggs[hvs].each {|agg|
137
+ agg.update(ary)
138
+ }
83
139
  }
84
- vary = vset.keys.sort_by {|a| a.map {|v| smart_cmp_value(v) } }
85
- hary = hset.keys.sort_by {|a| a.map {|v| smart_cmp_value(v) } }
86
- with_output {|out|
87
- Tb.csv_stream_output(out) {|gen|
88
- hkfs.each_with_index {|hkf, i|
89
- next if Tb::Cmd.opt_cross_compact && i == hkfs.length - 1
90
- row = [nil] * (vkfs.length - 1) + [hkf]
91
- hary.each {|hkvs| opt_cross_fields.length.times { row << hkvs[i] } }
92
- gen << row
93
- }
94
- if Tb::Cmd.opt_cross_compact
95
- r = vkfs.dup
96
- hary.each {|hkvs| r.concat([hkvs[-1]] * opt_cross_fields.length) }
97
- gen << r
140
+ after = lambda {|last_pairs|
141
+ ary = vkfs.map {|f| last_pairs[f] }
142
+ hvs_list.each {|hvs|
143
+ if aggs.has_key? hvs
144
+ ary.concat(aggs[hvs].map {|agg| agg.finish })
98
145
  else
99
- r = vkfs.dup
100
- hary.each {|hkvs| r.concat opt_cross_fields.map {|agg_spec, new_field| new_field } }
101
- gen << r
146
+ ary.concat([nil] * opt_cross_fields.length)
102
147
  end
103
- vary.each {|vkvs|
104
- row = vkvs.dup
105
- hary.each {|hkvs|
106
- ags = set[[vkvs, hkvs]]
107
- if !ags
108
- opt_cross_fields.length.times { row << nil }
109
- else
110
- ags.each {|ag| row << ag.finish }
111
- end
112
- }
113
- gen << row
114
- }
115
148
  }
149
+ pairs = {}
150
+ ary.each_with_index {|v, i|
151
+ pairs[(i+1).to_s] = v
152
+ }
153
+ y.yield pairs
116
154
  }
155
+ sorted2.each_group_element(boudary_p, before, body, after)
156
+ }
157
+ with_output {|out|
158
+ er.write_to_csv(out, false)
117
159
  }
118
160
  end
119
161
 
data/lib/tb/cmd_cut.rb CHANGED
@@ -1,26 +1,30 @@
1
- # Copyright (C) 2011 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
- # modification, are permitted provided that the following conditions are met:
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
5
6
  #
6
- # 1. Redistributions of source code must retain the above copyright notice, this
7
- # list of conditions and the following disclaimer.
8
- # 2. Redistributions in binary form must reproduce the above copyright notice,
9
- # this list of conditions and the following disclaimer in the documentation
10
- # and/or other materials provided with the distribution.
11
- # 3. The name of the author may not be used to endorse or promote products
12
- # derived from this software without specific prior written permission.
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
13
16
  #
14
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
15
- # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
16
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
17
- # EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
19
- # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20
- # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21
- # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
22
- # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
23
- # OF SUCH DAMAGE.
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
28
 
25
29
  Tb::Cmd.subcommands << 'cut'
26
30
 
@@ -28,7 +32,8 @@ Tb::Cmd.default_option[:opt_cut_v] = nil
28
32
 
29
33
  def (Tb::Cmd).op_cut
30
34
  op = OptionParser.new
31
- op.banner = 'Usage: tb cut [OPTS] FIELD,... [TABLE]'
35
+ op.banner = "Usage: tb cut [OPTS] FIELD,... [TABLE]\n" +
36
+ "Select columns."
32
37
  define_common_option(op, "hNo", "--no-pager")
33
38
  op.def_option('-v', 'invert match') { Tb::Cmd.opt_cut_v = true }
34
39
  op
@@ -42,35 +47,35 @@ def (Tb::Cmd).main_cut(argv)
42
47
  argv = ['-'] if argv.empty?
43
48
  Tb::CatReader.open(argv, Tb::Cmd.opt_N) {|tblreader|
44
49
  if Tb::Cmd.opt_cut_v
45
- h = {}
46
- fs.each {|f| h[tblreader.index_from_field(f)] = true }
47
- header = nil
48
- if !Tb::Cmd.opt_N
49
- header = []
50
- tblreader.header.each_with_index {|f, i|
51
- header << f if !h[i]
52
- }
53
- end
54
- with_table_stream_output {|gen|
55
- gen.output_header(header)
56
- tblreader.each {|ary|
57
- values = []
58
- ary.each_with_index {|v, i|
59
- values << v if !h[i]
60
- }
61
- gen << values
50
+ er = Tb::Enumerator.new {|y|
51
+ tblreader.with_header {|header0|
52
+ if header0
53
+ y.set_header header0 - fs
54
+ end
55
+ }.each {|pairs|
56
+ y.yield pairs.reject {|k, v| fs.include? k }
62
57
  }
63
58
  }
59
+ with_output {|out|
60
+ er.write_to_csv(out, !Tb::Cmd.opt_N)
61
+ }
64
62
  else
65
- header = tblreader.header
66
- is = []
67
- is = fs.map {|f| tblreader.index_from_field_ex(f) }
68
- with_table_stream_output {|gen|
69
- gen.output_header(is.map {|i| tblreader.field_from_index_ex(i) })
70
- tblreader.each {|ary|
71
- gen << ary.values_at(*is)
63
+ er = Tb::Enumerator.new {|y|
64
+ tblreader.with_header {|header0|
65
+ if header0
66
+ fieldset = Tb::FieldSet.new(*header0)
67
+ fs.each {|f|
68
+ fieldset.index_from_field_ex(f)
69
+ }
70
+ end
71
+ y.set_header fs
72
+ }.each {|pairs|
73
+ y.yield pairs.reject {|k, v| !fs.include?(k) }
72
74
  }
73
75
  }
76
+ with_output {|out|
77
+ er.write_to_csv(out, !Tb::Cmd.opt_N)
78
+ }
74
79
  end
75
80
  }
76
81
  end
@@ -0,0 +1,193 @@
1
+ # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions
5
+ # are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following
11
+ # disclaimer in the documentation and/or other materials provided
12
+ # with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote
14
+ # products derived from this software without specific prior
15
+ # written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18
+ # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
+ # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21
+ # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23
+ # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24
+ # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25
+ # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26
+ # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27
+ # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ Tb::Cmd.subcommands << 'git-log'
30
+
31
+ Tb::Cmd.default_option[:opt_git_log_git_command] = nil
32
+ Tb::Cmd.default_option[:opt_git_log_debug_git_log_file] = nil
33
+
34
+ def (Tb::Cmd).op_git_log
35
+ op = OptionParser.new
36
+ op.banner = "Usage: tb git-log [OPTS] [GIT-DIR ...]\n" +
37
+ "Show the GIT log as a table."
38
+ define_common_option(op, "hNo", "--no-pager")
39
+ op.def_option('--git-command COMMAND', 'specify the git command (default: git)') {|command| Tb::Cmd.opt_git_log_git_command = command }
40
+ op.def_option('--debug-git-log-file FILE', 'specify the result git log (for debug)') {|filename| Tb::Cmd.opt_git_log_debug_git_log_file = filename }
41
+ op
42
+ end
43
+
44
+ Tb::Cmd::GIT_LOG_FORMAT_SPEC = [
45
+ %w[commit %H],
46
+ %w[tree %T],
47
+ %w[parents %P],
48
+ %w[author-name %an],
49
+ %w[author-email %ae],
50
+ %w[author-date %ai],
51
+ %w[committer-name %cn],
52
+ %w[committer-email %ce],
53
+ %w[committer-date %ci],
54
+ %w[ref-names %d],
55
+ %w[encoding %e],
56
+ %w[subject %s],
57
+ %w[body %b],
58
+ %w[raw-body %B],
59
+ %w[notes %N],
60
+ %w[reflog-selector %gD],
61
+ %w[reflog-subject %gs],
62
+ ]
63
+
64
+ Tb::Cmd::GIT_LOG_PRETTY_FORMAT = 'format:%x01commit-separator%x01%n' +
65
+ Tb::Cmd::GIT_LOG_FORMAT_SPEC.map {|k, v| "#{k}:%w(0,0,1)#{v}%w(0,0,0)%n" }.join('') +
66
+ "end-commit%n"
67
+
68
+ Tb::Cmd::GIT_LOG_HEADER = Tb::Cmd::GIT_LOG_FORMAT_SPEC.map {|k, v| k } + ['files']
69
+
70
+ def (Tb::Cmd).git_log_with_git_log(dir)
71
+ if Tb::Cmd.opt_git_log_debug_git_log_file
72
+ File.open(Tb::Cmd.opt_git_log_debug_git_log_file) {|f|
73
+ yield f
74
+ }
75
+ else
76
+ git = Tb::Cmd.opt_git_log_git_command || 'git'
77
+ # depends Ruby 1.9.
78
+ command = [
79
+ git,
80
+ 'log',
81
+ "--pretty=#{Tb::Cmd::GIT_LOG_PRETTY_FORMAT}",
82
+ '--decorate=full',
83
+ '--raw',
84
+ '--abbrev=40',
85
+ '.',
86
+ {:chdir=>dir}
87
+ ]
88
+ IO.popen(command) {|f|
89
+ yield f
90
+ }
91
+ end
92
+ end
93
+
94
+ def (Tb::Cmd).git_log_unescape_filename(filename)
95
+ if /\A"/ =~ filename
96
+ $'.chomp('"').gsub(/\\((\d\d\d)|[abtnvfr"\\])/) {
97
+ str = $1
98
+ if $2
99
+ [str.to_i(8)].pack("C")
100
+ else
101
+ case str
102
+ when 'a' then "\a"
103
+ when 'b' then "\b"
104
+ when 't' then "\t"
105
+ when 'n' then "\n"
106
+ when 'v' then "\v"
107
+ when 'f' then "\f"
108
+ when 'r' then "\r"
109
+ when '"' then '"'
110
+ when '\\' then "\\"
111
+ else
112
+ warn "unexpected escape: #{str.inspect}"
113
+ end
114
+ end
115
+ }
116
+ else
117
+ filename
118
+ end
119
+ end
120
+
121
+ def (Tb::Cmd).git_log_parse_commit(commit_info, files)
122
+ commit_info = commit_info.split(/\n(?=[a-z])/)
123
+ Tb.csv_stream_output(files_csv="") {|gen|
124
+ gen << %w[mode1 mode2 hash1 hash2 status filename]
125
+ files.split(/\n/).each {|file_line|
126
+ if /\A:(\d+) (\d+) ([0-9a-f]+) ([0-9a-f]+) (\S+)\t(.+)\z/ !~ file_line
127
+ warn "unexpected git-log output: #{file_line.inspect}"
128
+ next
129
+ end
130
+ mode1, mode2, hash1, hash2, status, filename = $1, $2, $3, $4, $5, $6
131
+ filename = git_log_unescape_filename(filename)
132
+ gen << [mode1, mode2, hash1, hash2, status, filename]
133
+ }
134
+ }
135
+ h = {}
136
+ commit_info.each {|s|
137
+ if /:/ !~ s
138
+ warn "unexpected git-log output: #{s.inspect}"
139
+ next
140
+ end
141
+ k = $`
142
+ v = $'.gsub(/\n /, "\n") # remove indent generated by %w(0,0,1)
143
+ case k
144
+ when /\A(?:author-date|committer-date)/
145
+ v = v.sub(/\A(\d+-\d\d-\d\d) (\d\d:\d\d:\d\d) ([-+]\d\d\d\d)\z/, '\1T\2\3')
146
+ when /\Aparents\z/
147
+ v = ['parent', *v.split(/ /)].map {|c| c + "\n" }.join("")
148
+ when /\Aref-names\z/
149
+ v = v.strip.gsub(/\A\(|\)\z/, '')
150
+ v = ['ref-name', *v.split(/, /)].map {|c| c + "\n" }.join("")
151
+ end
152
+ h[k] = v
153
+ }
154
+ h['files'] = files_csv
155
+ h
156
+ end
157
+
158
+ def (Tb::Cmd).git_log_each_commit(f)
159
+ while chunk = f.gets("\x01commit-separator\x01\n")
160
+ chunk.chomp!("\x01commit-separator\x01\n")
161
+ next if chunk.empty? # beginning of the output
162
+ if /\nend-commit\n/ !~ chunk
163
+ warn "unexpected git-log output: #{chunk.inspect}"
164
+ next
165
+ end
166
+ commit_info, files = $`, $'
167
+ files.sub!(/\A\n/, '')
168
+ h = git_log_parse_commit(commit_info, files)
169
+ yield h
170
+ end
171
+
172
+ end
173
+
174
+ def (Tb::Cmd).main_git_log(argv)
175
+ op_git_log.parse!(argv)
176
+ exit_if_help('git-log')
177
+ argv = ['.'] if argv.empty?
178
+ er = Tb::Enumerator.new {|y|
179
+ y.set_header Tb::Cmd::GIT_LOG_HEADER
180
+ argv.each {|dir|
181
+ git_log_with_git_log(dir) {|f|
182
+ f.set_encoding("ASCII-8BIT") if f.respond_to? :set_encoding
183
+ git_log_each_commit(f) {|h|
184
+ y.yield h
185
+ }
186
+ }
187
+ }
188
+ }
189
+ with_output {|out|
190
+ er.write_to_csv(out, !Tb::Cmd.opt_N)
191
+ }
192
+ end
193
+