tb 0.5 → 0.6

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -149,6 +149,7 @@ There are more subcommands.
149
149
  tb group [OPTS] KEY-FIELD1,... [TABLE ...]
150
150
  tb cross [OPTS] VKEY-FIELD1,... HKEY-FIELD1,... [TABLE ...]
151
151
  tb melt KEY-FIELDS-LIST [OPTS] [TABLE ...]
152
+ tb unmelt [OPTS] [TABLE ...]
152
153
  tb nest [OPTS] NEWFIELD,OLDFIELD1,OLDFIELD2,... [TABLE ...]
153
154
  tb unnest [OPTS] FIELD [TABLE ...]
154
155
  tb shape [OPTS] [TABLE ...]
@@ -181,6 +182,7 @@ tb help -s shows one line summary of the subcommands.
181
182
  group : Group and aggregate rows.
182
183
  cross : Create a cross table. (a.k.a contingency table, pivot table)
183
184
  melt : split value fields into records.
185
+ unmelt : merge melted records into a record.
184
186
  nest : Nest fields.
185
187
  unnest : Unnest a field.
186
188
  shape : Show table size.
@@ -30,6 +30,7 @@ Tb::Cmd.subcommands << 'tar-tvf'
30
30
 
31
31
  Tb::Cmd.default_option[:opt_tar_tvf_l] = 0
32
32
  Tb::Cmd.default_option[:opt_tar_tvf_ustar] = nil
33
+ Tb::Cmd.default_option[:opt_tar_tvf_hash] = []
33
34
 
34
35
  def (Tb::Cmd).op_tar_tvf
35
36
  op = OptionParser.new
@@ -38,6 +39,7 @@ def (Tb::Cmd).op_tar_tvf
38
39
  define_common_option(op, "hNo", "--no-pager")
39
40
  op.def_option('-l', 'show more attributes.') {|fs| Tb::Cmd.opt_tar_tvf_l += 1 }
40
41
  op.def_option('--ustar', 'ustar format (POSIX.1-1988). No GNU and POSIX.1-2001 extension.') {|fs| Tb::Cmd.opt_tar_tvf_ustar = true }
42
+ op.def_option('--hash=ALGORITHMS', 'hash algorithms such as md5,sha256,sha384,sha512 (default: none)') {|hs| Tb::Cmd.opt_tar_tvf_hash.concat split_field_list_argument(hs) }
41
43
  op
42
44
  end
43
45
 
@@ -74,6 +76,14 @@ Tb::Cmd::TAR_TYPEFLAG = {
74
76
  '7' => :contiguous, # [POSIX] Reserved for high-performance file. (It is come from "contiguous file" (S_IFCTG) of Masscomp?)
75
77
  }
76
78
 
79
+ Tb::Cmd::TAR_HASH_ALGORITHMS = {
80
+ 'md5' => 'MD5',
81
+ 'sha1' => 'SHA1',
82
+ 'sha256' => 'SHA256',
83
+ 'sha384' => 'SHA384',
84
+ 'sha512' => 'SHA512',
85
+ }
86
+
77
87
  def (Tb::Cmd).tar_tvf_parse_seconds_from_epoch(val)
78
88
  if /\./ =~ val
79
89
  num = ($` + $').to_i
@@ -196,6 +206,41 @@ class Tb::Cmd::TarReader
196
206
  end
197
207
  @offset += size
198
208
  end
209
+
210
+ def calculate_hash(blocklength, filesize, alg_list) # :yield: alg, result
211
+ digests = {}
212
+ alg_list.each {|alg|
213
+ c = Digest.const_get(Tb::Cmd::TAR_HASH_ALGORITHMS.fetch(alg))
214
+ digests[alg] = c.new
215
+ }
216
+ rest = blocklength
217
+ while 0 < rest
218
+ if rest < 4096
219
+ s = rest
220
+ else
221
+ s = 4096
222
+ end
223
+ ret = @input.read(s)
224
+ if !ret || ret.length != s
225
+ warn "premature end of tar archive content (#{kind})"
226
+ raise Tb::Cmd::TarFormatError
227
+ end
228
+ if filesize < s
229
+ ret = ret[0, filesize]
230
+ end
231
+ digests.each {|alg, d|
232
+ d.update ret
233
+ }
234
+ filesize -= s
235
+ rest -= s
236
+ end
237
+ @offset += blocklength
238
+ result = {}
239
+ digests.each {|alg, d|
240
+ result[alg] = d.hexdigest
241
+ }
242
+ result
243
+ end
199
244
  end
200
245
 
201
246
  def (Tb::Cmd).tar_tvf_read_end_of_archive_indicator(reader)
@@ -291,7 +336,13 @@ def (Tb::Cmd).tar_tvf_each(f)
291
336
  when :link, :symlink, :directory, :character_special, :block_special, :fifo
292
337
  # xxx: hardlink may have contents for posix archive.
293
338
  else
294
- reader.skip(content_blocklength, 'file content')
339
+ if Tb::Cmd.opt_tar_tvf_hash.empty?
340
+ reader.skip(content_blocklength, 'file content')
341
+ else
342
+ reader.calculate_hash(content_blocklength, h[:size], Tb::Cmd.opt_tar_tvf_hash).each {|alg, result|
343
+ h[alg] = result
344
+ }
345
+ end
295
346
  end
296
347
  h[:size_in_tar] = reader.offset - offset
297
348
  yield h
@@ -405,6 +456,10 @@ end
405
456
  def (Tb::Cmd).main_tar_tvf(argv)
406
457
  op_tar_tvf.parse!(argv)
407
458
  exit_if_help('tar-tvf')
459
+ if Tb::Cmd.opt_tar_tvf_hash.any? {|alg| !Tb::Cmd::TAR_HASH_ALGORITHMS[alg] }
460
+ STDERR.puts "Unexpected hash algorithm: #{Tb::Cmd.opt_tar_tvf_hash.reject {|alg| Tb::Cmd::TAR_HASH_ALGORITHMS[alg] }.join(",")}"
461
+ exit false
462
+ end
408
463
  argv = ['-'] if argv.empty?
409
464
  er = Tb::Enumerator.new {|y|
410
465
  if Tb::Cmd.opt_tar_tvf_l == 0
@@ -412,6 +467,7 @@ def (Tb::Cmd).main_tar_tvf(argv)
412
467
  else
413
468
  header = Tb::Cmd::TAR_CSV_LONG_HEADER
414
469
  end
470
+ header += Tb::Cmd.opt_tar_tvf_hash
415
471
  y.set_header header
416
472
  argv.each {|filename|
417
473
  tar_tvf_open_with(filename) {|f|
@@ -436,6 +492,7 @@ def (Tb::Cmd).main_tar_tvf(argv)
436
492
  formatted["tar_typeflag"] = h[:typeflag]
437
493
  formatted["tar_magic"] = h[:magic]
438
494
  formatted["tar_version"] = h[:version]
495
+ Tb::Cmd.opt_tar_tvf_hash.each {|alg| formatted[alg] = h[alg] }
439
496
  y.yield Hash[header.map {|f2| [f2, formatted[f2]] }]
440
497
  }
441
498
  }
@@ -32,6 +32,7 @@ Tb::Cmd.default_option[:opt_unmelt_recnum] = nil
32
32
  Tb::Cmd.default_option[:opt_unmelt_keys] = []
33
33
  Tb::Cmd.default_option[:opt_unmelt_variable_field] = 'variable'
34
34
  Tb::Cmd.default_option[:opt_unmelt_value_field] = 'value'
35
+ Tb::Cmd.default_option[:opt_unmelt_missing_value] = nil
35
36
 
36
37
  def (Tb::Cmd).op_unmelt
37
38
  op = OptionParser.new
@@ -51,6 +52,9 @@ def (Tb::Cmd).op_unmelt
51
52
  op.def_option('--value-field FIELD', 'value field. (default: value)') {|field|
52
53
  Tb::Cmd.opt_unmelt_value_field = field
53
54
  }
55
+ op.def_option('--missing-value FIELD', 'used for missing values. (default: not specified)') {|value|
56
+ Tb::Cmd.opt_unmelt_missing_value = value
57
+ }
54
58
  op
55
59
  end
56
60
 
@@ -69,6 +73,7 @@ def (Tb::Cmd).main_unmelt(argv)
69
73
  end
70
74
  key_fields += Tb::Cmd.opt_unmelt_keys
71
75
  end
76
+ melt_fields_hash = {}
72
77
  er = Tb::Enumerator.new {|y|
73
78
  creader.chunk {|pairs|
74
79
  keys = {}
@@ -92,6 +97,7 @@ def (Tb::Cmd).main_unmelt(argv)
92
97
  pairs_ary.each {|pairs|
93
98
  var = pairs[Tb::Cmd.opt_unmelt_variable_field]
94
99
  val = pairs[Tb::Cmd.opt_unmelt_value_field]
100
+ melt_fields_hash[var] = true
95
101
  if rec.has_key? var
96
102
  y.yield rec
97
103
  rec = keys.dup
@@ -101,6 +107,20 @@ def (Tb::Cmd).main_unmelt(argv)
101
107
  y.yield rec
102
108
  }
103
109
  }
104
- output_tbenum(er)
110
+ if !Tb::Cmd.opt_unmelt_missing_value
111
+ er2 = er
112
+ else
113
+ er2 = Tb::Enumerator.new {|y|
114
+ er.to_fileenumerator.with_header {|header|
115
+ y.set_header header
116
+ }.each {|pairs|
117
+ melt_fields_hash.each_key {|f|
118
+ pairs[f] ||= Tb::Cmd.opt_unmelt_missing_value
119
+ }
120
+ y.yield pairs
121
+ }
122
+ }
123
+ end
124
+ output_tbenum(er2)
105
125
  end
106
126
 
@@ -32,6 +32,7 @@ require 'pathname'
32
32
  require 'etc'
33
33
  require 'time'
34
34
  require 'enumerator'
35
+ require 'digest'
35
36
  require 'tb/pager'
36
37
  require 'tb/cmdutil'
37
38
  require 'tb/cmd_help'
@@ -278,4 +278,30 @@ class TestTbCmdTarTvf < Test::Unit::TestCase
278
278
  }
279
279
  end
280
280
 
281
+ def test_hash_short
282
+ str = 'bar'
283
+ open('foo', 'w') {|f| f.print str }
284
+ assert(system('tar cf foo.tar foo'))
285
+ %w[MD5 SHA1 SHA256 SHA384 SHA512].each {|cname|
286
+ alg = cname.downcase
287
+ Tb::Cmd.main_tar_tvf(['-o', o='o.csv', "--hash=#{alg}", 'foo.tar'])
288
+ result = File.read(o)
289
+ assert_equal(2, result.count("\n"), "hash algorithm: #{alg}")
290
+ assert_match(/,#{Regexp.escape Digest.const_get(cname).hexdigest(str)}$/, result, "hash algorithm: #{alg}")
291
+ }
292
+ end
293
+
294
+ def test_hash_long
295
+ str = 'bx' * 8000
296
+ open('foo', 'w') {|f| f.print str }
297
+ assert(system('tar cf foo.tar foo'))
298
+ %w[MD5 SHA1 SHA256 SHA384 SHA512].each {|cname|
299
+ alg = cname.downcase
300
+ Tb::Cmd.main_tar_tvf(['-o', o='o.csv', "--hash=#{alg}", 'foo.tar'])
301
+ result = File.read(o)
302
+ assert_equal(2, result.count("\n"), "hash algorithm: #{alg}")
303
+ assert_match(/,#{Regexp.escape Digest.const_get(cname).hexdigest(str)}$/, result, "hash algorithm: #{alg}")
304
+ }
305
+ end
306
+
281
307
  end
@@ -154,4 +154,18 @@ class TestTbCmdUnmelt < Test::Unit::TestCase
154
154
  End
155
155
  end
156
156
 
157
+ def test_missing_value
158
+ File.open(i="i.csv", "w") {|f| f << <<-"End".gsub(/^[ \t]+/, '') }
159
+ a,variable,value
160
+ 0,x,2
161
+ 1,y,3
162
+ End
163
+ Tb::Cmd.main_unmelt(['-o', o="o.csv", '--missing-value=9', i])
164
+ assert_equal(<<-"End".gsub(/^[ \t]+/, ''), File.read(o))
165
+ a,x,y
166
+ 0,2,9
167
+ 1,9,3
168
+ End
169
+ end
170
+
157
171
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tb
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-29 00:00:00.000000000 Z
12
+ date: 2012-04-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
15
15