tb 0.9 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README +13 -11
  3. data/lib/tb.rb +14 -6
  4. data/lib/tb/catreader.rb +2 -2
  5. data/lib/tb/cmd_consecutive.rb +6 -2
  6. data/lib/tb/cmd_crop.rb +22 -3
  7. data/lib/tb/cmd_cross.rb +24 -0
  8. data/lib/tb/cmd_cut.rb +20 -10
  9. data/lib/tb/cmd_git.rb +20 -7
  10. data/lib/tb/cmd_group.rb +32 -0
  11. data/lib/tb/cmd_gsub.rb +21 -0
  12. data/lib/tb/cmd_join.rb +28 -0
  13. data/lib/tb/cmd_ls.rb +9 -0
  14. data/lib/tb/cmd_melt.rb +15 -0
  15. data/lib/tb/cmd_mheader.rb +15 -0
  16. data/lib/tb/cmd_nest.rb +27 -6
  17. data/lib/tb/cmd_newfield.rb +19 -2
  18. data/lib/tb/cmd_rename.rb +20 -0
  19. data/lib/tb/{cmd_grep.rb → cmd_search.rb} +37 -23
  20. data/lib/tb/cmd_shape.rb +69 -25
  21. data/lib/tb/cmd_sort.rb +20 -0
  22. data/lib/tb/cmd_tar.rb +38 -0
  23. data/lib/tb/cmd_to_json.rb +2 -2
  24. data/lib/tb/cmd_to_ltsv.rb +3 -3
  25. data/lib/tb/cmd_to_pnm.rb +3 -3
  26. data/lib/tb/cmd_to_tsv.rb +3 -3
  27. data/lib/tb/cmd_to_yaml.rb +3 -3
  28. data/lib/tb/cmd_unmelt.rb +15 -0
  29. data/lib/tb/cmd_unnest.rb +31 -7
  30. data/lib/tb/cmdmain.rb +2 -0
  31. data/lib/tb/cmdtop.rb +1 -1
  32. data/lib/tb/cmdutil.rb +9 -62
  33. data/lib/tb/csv.rb +21 -79
  34. data/lib/tb/enumerable.rb +42 -68
  35. data/lib/tb/enumerator.rb +15 -7
  36. data/lib/tb/{fieldset.rb → hashreader.rb} +37 -56
  37. data/lib/tb/hashwriter.rb +54 -0
  38. data/lib/tb/headerreader.rb +108 -0
  39. data/lib/tb/headerwriter.rb +116 -0
  40. data/lib/tb/json.rb +17 -15
  41. data/lib/tb/ltsv.rb +35 -96
  42. data/lib/tb/ndjson.rb +63 -0
  43. data/lib/tb/numericreader.rb +66 -0
  44. data/lib/tb/numericwriter.rb +61 -0
  45. data/lib/tb/pnm.rb +206 -200
  46. data/lib/tb/ropen.rb +54 -59
  47. data/lib/tb/tsv.rb +39 -71
  48. data/sample/excel2csv +24 -25
  49. data/sample/poi-xls2csv.rb +13 -14
  50. data/tb.gemspec +154 -0
  51. data/test/test_cmd_cat.rb +28 -6
  52. data/test/test_cmd_consecutive.rb +8 -3
  53. data/test/test_cmd_cut.rb +14 -4
  54. data/test/test_cmd_git_log.rb +50 -50
  55. data/test/test_cmd_grep.rb +6 -6
  56. data/test/test_cmd_gsub.rb +7 -2
  57. data/test/test_cmd_ls.rb +70 -62
  58. data/test/test_cmd_shape.rb +43 -6
  59. data/test/test_cmd_svn_log.rb +26 -27
  60. data/test/test_cmd_to_csv.rb +10 -5
  61. data/test/test_cmd_to_json.rb +16 -0
  62. data/test/test_cmd_to_ltsv.rb +2 -2
  63. data/test/test_cmd_to_pp.rb +7 -2
  64. data/test/test_csv.rb +74 -62
  65. data/test/test_ex_enumerable.rb +0 -1
  66. data/test/test_fileenumerator.rb +3 -3
  67. data/test/test_headercsv.rb +43 -0
  68. data/test/test_json.rb +2 -2
  69. data/test/test_ltsv.rb +22 -17
  70. data/test/test_ndjson.rb +62 -0
  71. data/test/test_numericcsv.rb +36 -0
  72. data/test/test_pnm.rb +69 -70
  73. data/test/test_reader.rb +27 -124
  74. data/test/test_tbenum.rb +18 -18
  75. data/test/test_tsv.rb +21 -32
  76. data/test/util_tbtest.rb +12 -0
  77. metadata +41 -19
  78. data/lib/tb/basic.rb +0 -1070
  79. data/lib/tb/reader.rb +0 -106
  80. data/lib/tb/record.rb +0 -158
  81. data/test/test_basic.rb +0 -403
  82. data/test/test_fieldset.rb +0 -42
  83. data/test/test_record.rb +0 -61
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b5601329ad0755149e198c2139d3da0707fb0af
4
- data.tar.gz: 03997adcba0d54d8745235e6534d899ba23100ec
3
+ metadata.gz: 9aa78baf63b93601d7d70cba5e9a69ecda6aceff
4
+ data.tar.gz: df5877535b6a51efc81c27b3903f99e9d762e997
5
5
  SHA512:
6
- metadata.gz: 59b8dea2fba883302eb50b2253c74c0e963ad408cf9031a908ee6957b91f0562f1c6ee93249e68ca13ed49d4144ddce863cfd381cba2e9c693801616791593e7
7
- data.tar.gz: ef49a9b3f97c8490303bf50812b590b38c56fa6c8b045bf136f032debd5a8070cbf10e28e092699578a3aac442c50b7128d8ed37f4d8e24ecee5a7b74cc72a93
6
+ metadata.gz: d460033685e07e30d9b3e2de12eeacfcb5c0c451655e9ad75cce094576ac34188a5badd93d56316f663f2096ffa41826d1f2e5f5a376be06047093b794ffbd2f
7
+ data.tar.gz: 727ec54c18343cba5a725952789202e220370478e430cfe4c366df152e9908bbfdd475126feef7047fca1d939fddc1b31c968eb11728c0953ae7f037f9d3ebd8
data/README CHANGED
@@ -1,12 +1,14 @@
1
- = tb - manipulation tool for table: CSV, TSV, JSON, LTSV, etc.
1
+ = tb - manipulation tool for tables
2
2
 
3
3
  tb provides a command and a library for manipulating tables:
4
- Unix filter like operations (grep, sort, cat, cut, ls, etc.),
4
+ Unix filter like operations (sort, cat, cut, ls, etc.),
5
5
  SQL like operations (join, group, etc.),
6
- other table operations (gsub, rename, cross, melt, unmelt, etc.),
6
+ other table operations (search, gsub, rename, cross, melt, unmelt, etc.),
7
7
  information extractions (git, svn, tar),
8
8
  and more.
9
9
 
10
+ tb supports various table formats: CSV, TSV, JSON, NDJSON, LTSV, etc.
11
+
10
12
  == Example
11
13
 
12
14
  There is a CSV file for programming languages and their birth year in
@@ -56,18 +58,18 @@ Also, the comparison method used in tb is smart to sort numbers correctly.
56
58
  BASIC,1964
57
59
  PL/I,1964
58
60
 
59
- "grep" subcommand search CSV file.
61
+ "search" subcommand search CSV file.
60
62
 
61
- % tb grep R sample/langs.csv
63
+ % tb search R sample/langs.csv
62
64
  language,year
63
65
  FORTRAN,1955
64
66
  Ruby,1993
65
67
 
66
- "grep" subcommand takes -f with field name and -v to show non-matching rows.
68
+ "search" subcommand takes -f with field name and -v to show non-matching rows.
67
69
  You don't need to care field separators (comma) to match.
68
70
  Following example searches languages which name contains a non-alphabet character.
69
71
 
70
- % tb grep -vf language '\A[A-Za-z]*\z' sample/langs.csv |cat
72
+ % tb search -vf language '\A[A-Za-z]*\z' sample/langs.csv
71
73
  language,year
72
74
  ALGOL 58,1958
73
75
  PL/I,1964
@@ -78,10 +80,10 @@ Following example searches languages which name contains a non-alphabet characte
78
80
  C#,2001
79
81
  F#,2002
80
82
 
81
- "grep" subcommand can take Ruby expression, instead of a regexp.
83
+ "search" subcommand can take Ruby expression, instead of a regexp.
82
84
  The variable, "_", contains a hash which represents a record.
83
85
 
84
- % tb grep --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
86
+ % tb search --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
85
87
  language,year
86
88
  Haskell,1990
87
89
  Python,1991
@@ -138,7 +140,7 @@ There are more subcommands.
138
140
  tb to-json [OPTS] [TABLE]
139
141
  tb to-yaml [OPTS] [TABLE]
140
142
  tb to-pp [OPTS] [TABLE]
141
- tb grep [OPTS] REGEXP [TABLE ...]
143
+ tb search [OPTS] REGEXP [TABLE ...]
142
144
  tb gsub [OPTS] REGEXP STRING [TABLE ...]
143
145
  tb sort [OPTS] [TABLE]
144
146
  tb cut [OPTS] FIELD,... [TABLE]
@@ -172,7 +174,7 @@ tb help -s shows one line summary of the subcommands.
172
174
  to-json : Convert a table to JSON (JavaScript Object Notation).
173
175
  to-yaml : Convert a table to YAML (YAML Ain't a Markup Language).
174
176
  to-pp : Convert a table to pretty printed format.
175
- grep : Search rows using regexp or ruby expression.
177
+ search : Search rows using regexp or ruby expression.
176
178
  gsub : Substitute cells.
177
179
  sort : Sort rows.
178
180
  cut : Select columns.
data/lib/tb.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # lib/tb.rb - entry file for table library
2
2
  #
3
- # Copyright (C) 2010-2013 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2010-2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -30,7 +30,7 @@
30
30
 
31
31
  require 'tempfile'
32
32
 
33
- class Tb
33
+ module Tb
34
34
  end
35
35
 
36
36
  require 'pp'
@@ -38,17 +38,25 @@ require 'tb/enumerable'
38
38
  require 'tb/enumerator'
39
39
  require 'tb/func'
40
40
  require 'tb/zipper'
41
- require 'tb/basic'
42
- require 'tb/record'
41
+
42
+ require 'tb/headerreader'
43
+ require 'tb/headerwriter'
44
+
45
+ require 'tb/numericreader'
46
+ require 'tb/numericwriter'
47
+
48
+ require 'tb/hashreader'
49
+ require 'tb/hashwriter'
50
+
43
51
  require 'tb/csv'
44
52
  require 'tb/tsv'
45
53
  require 'tb/ltsv'
46
54
  require 'tb/pnm'
47
55
  require 'tb/json'
48
- require 'tb/reader'
56
+ require 'tb/ndjson'
57
+
49
58
  require 'tb/ropen'
50
59
  require 'tb/catreader'
51
- require 'tb/fieldset'
52
60
  require 'tb/search'
53
61
  require 'tb/ex_enumerable'
54
62
  require 'tb/ex_enumerator'
@@ -1,6 +1,6 @@
1
1
  # lib/tb/catreader.rb - Tb::CatReader class
2
2
  #
3
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -32,7 +32,7 @@ module Tb::CatReader
32
32
  def self.open(filenames, numeric=false, with_filename=false)
33
33
  readers = []
34
34
  filenames.each {|f|
35
- r = Tb.open_reader(f, numeric ? {:numeric=>true} : {})
35
+ r = Tb.open_reader(f, numeric)
36
36
  if with_filename
37
37
  r = r.newfield("filename") { f }
38
38
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -74,7 +74,11 @@ def (Tb::Cmd).main_consecutive(argv)
74
74
  ps = buf[i]
75
75
  next if !ps.has_key?(f)
76
76
  v = ps[f]
77
- pairs2["#{f}_#{i+1}"] = v
77
+ if Tb::Cmd.opt_N
78
+ pairs2[((f.to_i-1) * Tb::Cmd.opt_consecutive_n + i + 1).to_s] = v
79
+ else
80
+ pairs2["#{f}_#{i+1}"] = v
81
+ end
78
82
  }
79
83
  }
80
84
  empty = false
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -35,10 +35,26 @@ def (Tb::Cmd).op_crop
35
35
  op.banner = "Usage: tb crop [OPTS] [TABLE ...]\n" +
36
36
  "Extract rectangle in a table."
37
37
  define_common_option(op, "ho", "--no-pager")
38
- op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| Tb::Cmd.opt_crop_range = arg }
38
+ op.def_option('-r RANGE', 'range. i.e. "R2C1:R4C3", "B1:D3"') {|arg| Tb::Cmd.opt_crop_range = arg }
39
39
  op
40
40
  end
41
41
 
42
+ Tb::Cmd.def_vhelp('crop', <<'End')
43
+ Example:
44
+
45
+ % cat tst.csv
46
+ 0,1,2,4
47
+ 5,6,7,8
48
+ 9,a,b,c
49
+ d,e,f,g
50
+ h,i,j,k
51
+ % tb crop -r R2C2:R4C3 tst.csv
52
+ 6,7
53
+ a,b
54
+ e,f
55
+ End
56
+
57
+
42
58
  def (Tb::Cmd).decode_a1_addressing_col(str)
43
59
  (26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
44
60
  end
@@ -79,7 +95,10 @@ def (Tb::Cmd).main_crop(argv)
79
95
  f = f.to_i
80
96
  f < range_col1 || range_col2 < f
81
97
  }
82
- y.yield pairs2
98
+ pairs2 = pairs2.map {|f, v|
99
+ [(f.to_i - range_col1 + 1).to_s, v]
100
+ }
101
+ y.yield Hash[pairs2]
83
102
  end
84
103
  rownum += 1
85
104
  }
@@ -42,6 +42,30 @@ def (Tb::Cmd).op_cross
42
42
  op
43
43
  end
44
44
 
45
+ Tb::Cmd.def_vhelp('cross', <<'End')
46
+ Example:
47
+
48
+ % cat tst.csv
49
+ a,b,c
50
+ A,X,2
51
+ A,Y,3
52
+ B,Y,4
53
+ % tb cross a b tst.csv
54
+ b,X,Y
55
+ a,count,count
56
+ A,1,1
57
+ B,,1
58
+ % tb cross -c a b tst.csv
59
+ a,X,Y
60
+ A,1,1
61
+ B,,1
62
+ % tb cross a b -a 'avg(c)' tst.csv
63
+ b,X,Y
64
+ a,avg(c),avg(c)
65
+ A,2.0,3.0
66
+ B,,4.0
67
+ End
68
+
45
69
  def (Tb::Cmd).main_cross(argv)
46
70
  op_cross.parse!(argv)
47
71
  exit_if_help('cross')
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -39,6 +39,23 @@ def (Tb::Cmd).op_cut
39
39
  op
40
40
  end
41
41
 
42
+ Tb::Cmd.def_vhelp('cut', <<'End')
43
+ Example:
44
+
45
+ % cat tst.csv
46
+ a,b,c
47
+ 0,1,2
48
+ 4,5,6
49
+ % tb cut a,c tst.csv
50
+ a,c
51
+ 0,2
52
+ 4,6
53
+ % tb cut -v a tst.csv
54
+ b,c
55
+ 1,2
56
+ 5,6
57
+ End
58
+
42
59
  def (Tb::Cmd).main_cut(argv)
43
60
  op_cut.parse!(argv)
44
61
  exit_if_help('cut')
@@ -59,15 +76,8 @@ def (Tb::Cmd).main_cut(argv)
59
76
  output_tbenum(er)
60
77
  else
61
78
  er = Tb::Enumerator.new {|y|
62
- tblreader.with_header {|header0|
63
- if header0
64
- fieldset = Tb::FieldSet.new(*header0)
65
- fs.each {|f|
66
- fieldset.index_from_field_ex(f)
67
- }
68
- end
69
- y.set_header fs
70
- }.each {|pairs|
79
+ y.set_header fs
80
+ tblreader.each {|pairs|
71
81
  y.yield pairs.reject {|k, v| !fs.include?(k) }
72
82
  }
73
83
  }
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -43,6 +43,20 @@ def (Tb::Cmd).op_git
43
43
  op
44
44
  end
45
45
 
46
+ Tb::Cmd.def_vhelp('git', <<'End')
47
+ Example:
48
+
49
+ % tb git | tb cut commit
50
+ commit
51
+ dedc77387459ea90b5f508be03cce9ecf0d990b5
52
+ bebf22d1d97921fb86f816ea48235948fa4c73b3
53
+ 38d61f96a8b33a6c5d527e564dc78b4a47b3eff0
54
+ 48c10d3b1b0609d389ce1d9203e57f63c1408703
55
+ d002c43e0a85ec7051f667140261d6b0e74e9039
56
+ 9292cade7c71d0b8805898621a53ba7a4713d029
57
+ ...
58
+ End
59
+
46
60
  Tb::Cmd::GIT_LOG_FORMAT_SPEC = [
47
61
  %w[commit %H],
48
62
  %w[tree %T],
@@ -150,12 +164,11 @@ def (Tb::Cmd).git_parse_commit(commit_info, files)
150
164
  warn "unexpected git output (raw/numstat): #{file_line.inspect}"
151
165
  end
152
166
  }
153
- Tb.csv_stream_output(files_csv="") {|gen|
154
- gen << %w[mode1 mode2 hash1 hash2 add del status filename]
155
- files_raw.each {|filename, (mode1, mode2, hash1, hash2, status)|
156
- add, del = files_numstat[filename]
157
- gen << [mode1, mode2, hash1, hash2, add, del, status, filename]
158
- }
167
+ files_csv = ""
168
+ files_csv << %w[mode1 mode2 hash1 hash2 add del status filename].to_csv
169
+ files_raw.each {|filename, (mode1, mode2, hash1, hash2, status)|
170
+ add, del = files_numstat[filename]
171
+ files_csv << [mode1, mode2, hash1, hash2, add, del, status, filename].to_csv
159
172
  }
160
173
  h = {}
161
174
  commit_info.each {|s|
@@ -41,6 +41,38 @@ def (Tb::Cmd).op_group
41
41
  op
42
42
  end
43
43
 
44
+ Tb::Cmd.def_vhelp('group', <<'End')
45
+ Example:
46
+
47
+ % cat tst.csv
48
+ a,b,c
49
+ A,X,2
50
+ A,Y,3
51
+ B,Y,4
52
+ % tb group a tst.csv
53
+ a
54
+ A
55
+ B
56
+ % tb group a -a count tst.csv
57
+ a,count
58
+ A,2
59
+ B,1
60
+ % tb group a -a 'avg(c)' tst.csv
61
+ a,avg(c)
62
+ A,2.5
63
+ B,4.0
64
+ % tb group a,b tst.csv
65
+ a,b
66
+ A,X
67
+ A,Y
68
+ B,Y
69
+ % tb group a,b -a count tst.csv
70
+ a,b,count
71
+ A,X,1
72
+ A,Y,1
73
+ B,Y,1
74
+ End
75
+
44
76
  def (Tb::Cmd).main_group(argv)
45
77
  op_group.parse!(argv)
46
78
  exit_if_help('group')
@@ -41,6 +41,27 @@ def (Tb::Cmd).op_gsub
41
41
  op
42
42
  end
43
43
 
44
+ Tb::Cmd.def_vhelp('gsub', <<'End')
45
+ Example:
46
+
47
+ % cat tst.csv
48
+ foo,bar
49
+ baz,qux
50
+ hoge,moga
51
+ % tb gsub o X tst.csv
52
+ foo,bar
53
+ baz,qux
54
+ hXge,mXga
55
+ % tb gsub -f foo o X tst.csv
56
+ foo,bar
57
+ baz,qux
58
+ hXge,moga
59
+ % tb gsub '[aeiou]' '{\&}' tst.csv
60
+ foo,bar
61
+ b{a}z,q{u}x
62
+ h{o}g{e},m{o}g{a}
63
+ End
64
+
44
65
  def (Tb::Cmd).main_gsub(argv)
45
66
  op_gsub.parse!(argv)
46
67
  exit_if_help('gsub')
@@ -59,6 +59,34 @@ def (Tb::Cmd).op_join
59
59
  op
60
60
  end
61
61
 
62
+ Tb::Cmd.def_vhelp('join', <<'End')
63
+ Example:
64
+
65
+ % cat tst1.csv
66
+ name,length
67
+ A,20
68
+ B,30
69
+ C,25
70
+ % cat tst2.csv
71
+ name,weight
72
+ A,5
73
+ B,8
74
+ % tb join tst1.csv tst2.csv
75
+ name,length,weight
76
+ A,20,5
77
+ B,30,8
78
+ % tb join --left tst1.csv tst2.csv
79
+ name,length,weight
80
+ A,20,5
81
+ B,30,8
82
+ C,25,
83
+ % tb join --left --outer-missing=zzz tst1.csv tst2.csv
84
+ name,length,weight
85
+ A,20,5
86
+ B,30,8
87
+ C,25,zzz
88
+ End
89
+
62
90
  def (Tb::Cmd).main_join(argv)
63
91
  op_join.parse!(argv)
64
92
  exit_if_help('join')
@@ -45,6 +45,15 @@ def (Tb::Cmd).op_ls
45
45
  op
46
46
  end
47
47
 
48
+ Tb::Cmd.def_vhelp('ls', <<'End')
49
+ Example:
50
+
51
+ % tb ls -l lib
52
+ filemode,nlink,user,group,size,mtime,filename,symlink
53
+ drwxr-xr-x,2,akr,akr,4096,2014-10-26T09:15:35+09:00,lib/tb,
54
+ -rw-r--r--,1,akr,akr,2113,2014-10-25T18:40:05+09:00,lib/tb.rb,
55
+ End
56
+
48
57
  def (Tb::Cmd).main_ls(argv)
49
58
  op_ls.parse!(argv)
50
59
  exit_if_help('ls')