tb 0.5 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -149,6 +149,7 @@ There are more subcommands.
149
149
  tb group [OPTS] KEY-FIELD1,... [TABLE ...]
150
150
  tb cross [OPTS] VKEY-FIELD1,... HKEY-FIELD1,... [TABLE ...]
151
151
  tb melt KEY-FIELDS-LIST [OPTS] [TABLE ...]
152
+ tb unmelt [OPTS] [TABLE ...]
152
153
  tb nest [OPTS] NEWFIELD,OLDFIELD1,OLDFIELD2,... [TABLE ...]
153
154
  tb unnest [OPTS] FIELD [TABLE ...]
154
155
  tb shape [OPTS] [TABLE ...]
@@ -181,6 +182,7 @@ tb help -s shows one line summary of the subcommands.
181
182
  group : Group and aggregate rows.
182
183
  cross : Create a cross table. (a.k.a contingency table, pivot table)
183
184
  melt : split value fields into records.
185
+ unmelt : merge melted records into a record.
184
186
  nest : Nest fields.
185
187
  unnest : Unnest a field.
186
188
  shape : Show table size.
@@ -30,6 +30,7 @@ Tb::Cmd.subcommands << 'tar-tvf'
30
30
 
31
31
  Tb::Cmd.default_option[:opt_tar_tvf_l] = 0
32
32
  Tb::Cmd.default_option[:opt_tar_tvf_ustar] = nil
33
+ Tb::Cmd.default_option[:opt_tar_tvf_hash] = []
33
34
 
34
35
  def (Tb::Cmd).op_tar_tvf
35
36
  op = OptionParser.new
@@ -38,6 +39,7 @@ def (Tb::Cmd).op_tar_tvf
38
39
  define_common_option(op, "hNo", "--no-pager")
39
40
  op.def_option('-l', 'show more attributes.') {|fs| Tb::Cmd.opt_tar_tvf_l += 1 }
40
41
  op.def_option('--ustar', 'ustar format (POSIX.1-1988). No GNU and POSIX.1-2001 extension.') {|fs| Tb::Cmd.opt_tar_tvf_ustar = true }
42
+ op.def_option('--hash=ALGORITHMS', 'hash algorithms such as md5,sha256,sha384,sha512 (default: none)') {|hs| Tb::Cmd.opt_tar_tvf_hash.concat split_field_list_argument(hs) }
41
43
  op
42
44
  end
43
45
 
@@ -74,6 +76,14 @@ Tb::Cmd::TAR_TYPEFLAG = {
74
76
  '7' => :contiguous, # [POSIX] Reserved for high-performance file. (It is come from "contiguous file" (S_IFCTG) of Masscomp?)
75
77
  }
76
78
 
79
+ Tb::Cmd::TAR_HASH_ALGORITHMS = {
80
+ 'md5' => 'MD5',
81
+ 'sha1' => 'SHA1',
82
+ 'sha256' => 'SHA256',
83
+ 'sha384' => 'SHA384',
84
+ 'sha512' => 'SHA512',
85
+ }
86
+
77
87
  def (Tb::Cmd).tar_tvf_parse_seconds_from_epoch(val)
78
88
  if /\./ =~ val
79
89
  num = ($` + $').to_i
@@ -196,6 +206,41 @@ class Tb::Cmd::TarReader
196
206
  end
197
207
  @offset += size
198
208
  end
209
+
210
+ def calculate_hash(blocklength, filesize, alg_list) # :yield: alg, result
211
+ digests = {}
212
+ alg_list.each {|alg|
213
+ c = Digest.const_get(Tb::Cmd::TAR_HASH_ALGORITHMS.fetch(alg))
214
+ digests[alg] = c.new
215
+ }
216
+ rest = blocklength
217
+ while 0 < rest
218
+ if rest < 4096
219
+ s = rest
220
+ else
221
+ s = 4096
222
+ end
223
+ ret = @input.read(s)
224
+ if !ret || ret.length != s
225
+ warn "premature end of tar archive content (#{kind})"
226
+ raise Tb::Cmd::TarFormatError
227
+ end
228
+ if filesize < s
229
+ ret = ret[0, filesize]
230
+ end
231
+ digests.each {|alg, d|
232
+ d.update ret
233
+ }
234
+ filesize -= s
235
+ rest -= s
236
+ end
237
+ @offset += blocklength
238
+ result = {}
239
+ digests.each {|alg, d|
240
+ result[alg] = d.hexdigest
241
+ }
242
+ result
243
+ end
199
244
  end
200
245
 
201
246
  def (Tb::Cmd).tar_tvf_read_end_of_archive_indicator(reader)
@@ -291,7 +336,13 @@ def (Tb::Cmd).tar_tvf_each(f)
291
336
  when :link, :symlink, :directory, :character_special, :block_special, :fifo
292
337
  # xxx: hardlink may have contents for posix archive.
293
338
  else
294
- reader.skip(content_blocklength, 'file content')
339
+ if Tb::Cmd.opt_tar_tvf_hash.empty?
340
+ reader.skip(content_blocklength, 'file content')
341
+ else
342
+ reader.calculate_hash(content_blocklength, h[:size], Tb::Cmd.opt_tar_tvf_hash).each {|alg, result|
343
+ h[alg] = result
344
+ }
345
+ end
295
346
  end
296
347
  h[:size_in_tar] = reader.offset - offset
297
348
  yield h
@@ -405,6 +456,10 @@ end
405
456
  def (Tb::Cmd).main_tar_tvf(argv)
406
457
  op_tar_tvf.parse!(argv)
407
458
  exit_if_help('tar-tvf')
459
+ if Tb::Cmd.opt_tar_tvf_hash.any? {|alg| !Tb::Cmd::TAR_HASH_ALGORITHMS[alg] }
460
+ STDERR.puts "Unexpected hash algorithm: #{Tb::Cmd.opt_tar_tvf_hash.reject {|alg| Tb::Cmd::TAR_HASH_ALGORITHMS[alg] }.join(",")}"
461
+ exit false
462
+ end
408
463
  argv = ['-'] if argv.empty?
409
464
  er = Tb::Enumerator.new {|y|
410
465
  if Tb::Cmd.opt_tar_tvf_l == 0
@@ -412,6 +467,7 @@ def (Tb::Cmd).main_tar_tvf(argv)
412
467
  else
413
468
  header = Tb::Cmd::TAR_CSV_LONG_HEADER
414
469
  end
470
+ header += Tb::Cmd.opt_tar_tvf_hash
415
471
  y.set_header header
416
472
  argv.each {|filename|
417
473
  tar_tvf_open_with(filename) {|f|
@@ -436,6 +492,7 @@ def (Tb::Cmd).main_tar_tvf(argv)
436
492
  formatted["tar_typeflag"] = h[:typeflag]
437
493
  formatted["tar_magic"] = h[:magic]
438
494
  formatted["tar_version"] = h[:version]
495
+ Tb::Cmd.opt_tar_tvf_hash.each {|alg| formatted[alg] = h[alg] }
439
496
  y.yield Hash[header.map {|f2| [f2, formatted[f2]] }]
440
497
  }
441
498
  }
@@ -32,6 +32,7 @@ Tb::Cmd.default_option[:opt_unmelt_recnum] = nil
32
32
  Tb::Cmd.default_option[:opt_unmelt_keys] = []
33
33
  Tb::Cmd.default_option[:opt_unmelt_variable_field] = 'variable'
34
34
  Tb::Cmd.default_option[:opt_unmelt_value_field] = 'value'
35
+ Tb::Cmd.default_option[:opt_unmelt_missing_value] = nil
35
36
 
36
37
  def (Tb::Cmd).op_unmelt
37
38
  op = OptionParser.new
@@ -51,6 +52,9 @@ def (Tb::Cmd).op_unmelt
51
52
  op.def_option('--value-field FIELD', 'value field. (default: value)') {|field|
52
53
  Tb::Cmd.opt_unmelt_value_field = field
53
54
  }
55
+ op.def_option('--missing-value FIELD', 'used for missing values. (default: not specified)') {|value|
56
+ Tb::Cmd.opt_unmelt_missing_value = value
57
+ }
54
58
  op
55
59
  end
56
60
 
@@ -69,6 +73,7 @@ def (Tb::Cmd).main_unmelt(argv)
69
73
  end
70
74
  key_fields += Tb::Cmd.opt_unmelt_keys
71
75
  end
76
+ melt_fields_hash = {}
72
77
  er = Tb::Enumerator.new {|y|
73
78
  creader.chunk {|pairs|
74
79
  keys = {}
@@ -92,6 +97,7 @@ def (Tb::Cmd).main_unmelt(argv)
92
97
  pairs_ary.each {|pairs|
93
98
  var = pairs[Tb::Cmd.opt_unmelt_variable_field]
94
99
  val = pairs[Tb::Cmd.opt_unmelt_value_field]
100
+ melt_fields_hash[var] = true
95
101
  if rec.has_key? var
96
102
  y.yield rec
97
103
  rec = keys.dup
@@ -101,6 +107,20 @@ def (Tb::Cmd).main_unmelt(argv)
101
107
  y.yield rec
102
108
  }
103
109
  }
104
- output_tbenum(er)
110
+ if !Tb::Cmd.opt_unmelt_missing_value
111
+ er2 = er
112
+ else
113
+ er2 = Tb::Enumerator.new {|y|
114
+ er.to_fileenumerator.with_header {|header|
115
+ y.set_header header
116
+ }.each {|pairs|
117
+ melt_fields_hash.each_key {|f|
118
+ pairs[f] ||= Tb::Cmd.opt_unmelt_missing_value
119
+ }
120
+ y.yield pairs
121
+ }
122
+ }
123
+ end
124
+ output_tbenum(er2)
105
125
  end
106
126
 
@@ -32,6 +32,7 @@ require 'pathname'
32
32
  require 'etc'
33
33
  require 'time'
34
34
  require 'enumerator'
35
+ require 'digest'
35
36
  require 'tb/pager'
36
37
  require 'tb/cmdutil'
37
38
  require 'tb/cmd_help'
@@ -278,4 +278,30 @@ class TestTbCmdTarTvf < Test::Unit::TestCase
278
278
  }
279
279
  end
280
280
 
281
+ def test_hash_short
282
+ str = 'bar'
283
+ open('foo', 'w') {|f| f.print str }
284
+ assert(system('tar cf foo.tar foo'))
285
+ %w[MD5 SHA1 SHA256 SHA384 SHA512].each {|cname|
286
+ alg = cname.downcase
287
+ Tb::Cmd.main_tar_tvf(['-o', o='o.csv', "--hash=#{alg}", 'foo.tar'])
288
+ result = File.read(o)
289
+ assert_equal(2, result.count("\n"), "hash algorithm: #{alg}")
290
+ assert_match(/,#{Regexp.escape Digest.const_get(cname).hexdigest(str)}$/, result, "hash algorithm: #{alg}")
291
+ }
292
+ end
293
+
294
+ def test_hash_long
295
+ str = 'bx' * 8000
296
+ open('foo', 'w') {|f| f.print str }
297
+ assert(system('tar cf foo.tar foo'))
298
+ %w[MD5 SHA1 SHA256 SHA384 SHA512].each {|cname|
299
+ alg = cname.downcase
300
+ Tb::Cmd.main_tar_tvf(['-o', o='o.csv', "--hash=#{alg}", 'foo.tar'])
301
+ result = File.read(o)
302
+ assert_equal(2, result.count("\n"), "hash algorithm: #{alg}")
303
+ assert_match(/,#{Regexp.escape Digest.const_get(cname).hexdigest(str)}$/, result, "hash algorithm: #{alg}")
304
+ }
305
+ end
306
+
281
307
  end
@@ -154,4 +154,18 @@ class TestTbCmdUnmelt < Test::Unit::TestCase
154
154
  End
155
155
  end
156
156
 
157
+ def test_missing_value
158
+ File.open(i="i.csv", "w") {|f| f << <<-"End".gsub(/^[ \t]+/, '') }
159
+ a,variable,value
160
+ 0,x,2
161
+ 1,y,3
162
+ End
163
+ Tb::Cmd.main_unmelt(['-o', o="o.csv", '--missing-value=9', i])
164
+ assert_equal(<<-"End".gsub(/^[ \t]+/, ''), File.read(o))
165
+ a,x,y
166
+ 0,2,9
167
+ 1,9,3
168
+ End
169
+ end
170
+
157
171
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tb
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.5'
4
+ version: '0.6'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-29 00:00:00.000000000 Z
12
+ date: 2012-04-29 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! 'tb is a manipulation tool for table: CSV, TSV, JSON, etc.
15
15