tb 0.9 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README +13 -11
  3. data/lib/tb.rb +14 -6
  4. data/lib/tb/catreader.rb +2 -2
  5. data/lib/tb/cmd_consecutive.rb +6 -2
  6. data/lib/tb/cmd_crop.rb +22 -3
  7. data/lib/tb/cmd_cross.rb +24 -0
  8. data/lib/tb/cmd_cut.rb +20 -10
  9. data/lib/tb/cmd_git.rb +20 -7
  10. data/lib/tb/cmd_group.rb +32 -0
  11. data/lib/tb/cmd_gsub.rb +21 -0
  12. data/lib/tb/cmd_join.rb +28 -0
  13. data/lib/tb/cmd_ls.rb +9 -0
  14. data/lib/tb/cmd_melt.rb +15 -0
  15. data/lib/tb/cmd_mheader.rb +15 -0
  16. data/lib/tb/cmd_nest.rb +27 -6
  17. data/lib/tb/cmd_newfield.rb +19 -2
  18. data/lib/tb/cmd_rename.rb +20 -0
  19. data/lib/tb/{cmd_grep.rb → cmd_search.rb} +37 -23
  20. data/lib/tb/cmd_shape.rb +69 -25
  21. data/lib/tb/cmd_sort.rb +20 -0
  22. data/lib/tb/cmd_tar.rb +38 -0
  23. data/lib/tb/cmd_to_json.rb +2 -2
  24. data/lib/tb/cmd_to_ltsv.rb +3 -3
  25. data/lib/tb/cmd_to_pnm.rb +3 -3
  26. data/lib/tb/cmd_to_tsv.rb +3 -3
  27. data/lib/tb/cmd_to_yaml.rb +3 -3
  28. data/lib/tb/cmd_unmelt.rb +15 -0
  29. data/lib/tb/cmd_unnest.rb +31 -7
  30. data/lib/tb/cmdmain.rb +2 -0
  31. data/lib/tb/cmdtop.rb +1 -1
  32. data/lib/tb/cmdutil.rb +9 -62
  33. data/lib/tb/csv.rb +21 -79
  34. data/lib/tb/enumerable.rb +42 -68
  35. data/lib/tb/enumerator.rb +15 -7
  36. data/lib/tb/{fieldset.rb → hashreader.rb} +37 -56
  37. data/lib/tb/hashwriter.rb +54 -0
  38. data/lib/tb/headerreader.rb +108 -0
  39. data/lib/tb/headerwriter.rb +116 -0
  40. data/lib/tb/json.rb +17 -15
  41. data/lib/tb/ltsv.rb +35 -96
  42. data/lib/tb/ndjson.rb +63 -0
  43. data/lib/tb/numericreader.rb +66 -0
  44. data/lib/tb/numericwriter.rb +61 -0
  45. data/lib/tb/pnm.rb +206 -200
  46. data/lib/tb/ropen.rb +54 -59
  47. data/lib/tb/tsv.rb +39 -71
  48. data/sample/excel2csv +24 -25
  49. data/sample/poi-xls2csv.rb +13 -14
  50. data/tb.gemspec +154 -0
  51. data/test/test_cmd_cat.rb +28 -6
  52. data/test/test_cmd_consecutive.rb +8 -3
  53. data/test/test_cmd_cut.rb +14 -4
  54. data/test/test_cmd_git_log.rb +50 -50
  55. data/test/test_cmd_grep.rb +6 -6
  56. data/test/test_cmd_gsub.rb +7 -2
  57. data/test/test_cmd_ls.rb +70 -62
  58. data/test/test_cmd_shape.rb +43 -6
  59. data/test/test_cmd_svn_log.rb +26 -27
  60. data/test/test_cmd_to_csv.rb +10 -5
  61. data/test/test_cmd_to_json.rb +16 -0
  62. data/test/test_cmd_to_ltsv.rb +2 -2
  63. data/test/test_cmd_to_pp.rb +7 -2
  64. data/test/test_csv.rb +74 -62
  65. data/test/test_ex_enumerable.rb +0 -1
  66. data/test/test_fileenumerator.rb +3 -3
  67. data/test/test_headercsv.rb +43 -0
  68. data/test/test_json.rb +2 -2
  69. data/test/test_ltsv.rb +22 -17
  70. data/test/test_ndjson.rb +62 -0
  71. data/test/test_numericcsv.rb +36 -0
  72. data/test/test_pnm.rb +69 -70
  73. data/test/test_reader.rb +27 -124
  74. data/test/test_tbenum.rb +18 -18
  75. data/test/test_tsv.rb +21 -32
  76. data/test/util_tbtest.rb +12 -0
  77. metadata +41 -19
  78. data/lib/tb/basic.rb +0 -1070
  79. data/lib/tb/reader.rb +0 -106
  80. data/lib/tb/record.rb +0 -158
  81. data/test/test_basic.rb +0 -403
  82. data/test/test_fieldset.rb +0 -42
  83. data/test/test_record.rb +0 -61
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b5601329ad0755149e198c2139d3da0707fb0af
4
- data.tar.gz: 03997adcba0d54d8745235e6534d899ba23100ec
3
+ metadata.gz: 9aa78baf63b93601d7d70cba5e9a69ecda6aceff
4
+ data.tar.gz: df5877535b6a51efc81c27b3903f99e9d762e997
5
5
  SHA512:
6
- metadata.gz: 59b8dea2fba883302eb50b2253c74c0e963ad408cf9031a908ee6957b91f0562f1c6ee93249e68ca13ed49d4144ddce863cfd381cba2e9c693801616791593e7
7
- data.tar.gz: ef49a9b3f97c8490303bf50812b590b38c56fa6c8b045bf136f032debd5a8070cbf10e28e092699578a3aac442c50b7128d8ed37f4d8e24ecee5a7b74cc72a93
6
+ metadata.gz: d460033685e07e30d9b3e2de12eeacfcb5c0c451655e9ad75cce094576ac34188a5badd93d56316f663f2096ffa41826d1f2e5f5a376be06047093b794ffbd2f
7
+ data.tar.gz: 727ec54c18343cba5a725952789202e220370478e430cfe4c366df152e9908bbfdd475126feef7047fca1d939fddc1b31c968eb11728c0953ae7f037f9d3ebd8
data/README CHANGED
@@ -1,12 +1,14 @@
1
- = tb - manipulation tool for table: CSV, TSV, JSON, LTSV, etc.
1
+ = tb - manipulation tool for tables
2
2
 
3
3
  tb provides a command and a library for manipulating tables:
4
- Unix filter like operations (grep, sort, cat, cut, ls, etc.),
4
+ Unix filter like operations (sort, cat, cut, ls, etc.),
5
5
  SQL like operations (join, group, etc.),
6
- other table operations (gsub, rename, cross, melt, unmelt, etc.),
6
+ other table operations (search, gsub, rename, cross, melt, unmelt, etc.),
7
7
  information extractions (git, svn, tar),
8
8
  and more.
9
9
 
10
+ tb supports various table formats: CSV, TSV, JSON, NDJSON, LTSV, etc.
11
+
10
12
  == Example
11
13
 
12
14
  There is a CSV file for programming languages and their birth year in
@@ -56,18 +58,18 @@ Also, the comparison method used in tb is smart to sort numbers correctly.
56
58
  BASIC,1964
57
59
  PL/I,1964
58
60
 
59
- "grep" subcommand search CSV file.
61
+ "search" subcommand search CSV file.
60
62
 
61
- % tb grep R sample/langs.csv
63
+ % tb search R sample/langs.csv
62
64
  language,year
63
65
  FORTRAN,1955
64
66
  Ruby,1993
65
67
 
66
- "grep" subcommand takes -f with field name and -v to show non-matching rows.
68
+ "search" subcommand takes -f with field name and -v to show non-matching rows.
67
69
  You don't need to care field separators (comma) to match.
68
70
  Following example searches languages which name contains a non-alphabet character.
69
71
 
70
- % tb grep -vf language '\A[A-Za-z]*\z' sample/langs.csv |cat
72
+ % tb search -vf language '\A[A-Za-z]*\z' sample/langs.csv
71
73
  language,year
72
74
  ALGOL 58,1958
73
75
  PL/I,1964
@@ -78,10 +80,10 @@ Following example searches languages which name contains a non-alphabet characte
78
80
  C#,2001
79
81
  F#,2002
80
82
 
81
- "grep" subcommand can take Ruby expression, instead of a regexp.
83
+ "search" subcommand can take Ruby expression, instead of a regexp.
82
84
  The variable, "_", contains a hash which represents a record.
83
85
 
84
- % tb grep --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
86
+ % tb search --ruby '(1990..1999).include?(_["year"].to_i)' sample/langs.csv
85
87
  language,year
86
88
  Haskell,1990
87
89
  Python,1991
@@ -138,7 +140,7 @@ There are more subcommands.
138
140
  tb to-json [OPTS] [TABLE]
139
141
  tb to-yaml [OPTS] [TABLE]
140
142
  tb to-pp [OPTS] [TABLE]
141
- tb grep [OPTS] REGEXP [TABLE ...]
143
+ tb search [OPTS] REGEXP [TABLE ...]
142
144
  tb gsub [OPTS] REGEXP STRING [TABLE ...]
143
145
  tb sort [OPTS] [TABLE]
144
146
  tb cut [OPTS] FIELD,... [TABLE]
@@ -172,7 +174,7 @@ tb help -s shows one line summary of the subcommands.
172
174
  to-json : Convert a table to JSON (JavaScript Object Notation).
173
175
  to-yaml : Convert a table to YAML (YAML Ain't a Markup Language).
174
176
  to-pp : Convert a table to pretty printed format.
175
- grep : Search rows using regexp or ruby expression.
177
+ search : Search rows using regexp or ruby expression.
176
178
  gsub : Substitute cells.
177
179
  sort : Sort rows.
178
180
  cut : Select columns.
data/lib/tb.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # lib/tb.rb - entry file for table library
2
2
  #
3
- # Copyright (C) 2010-2013 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2010-2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -30,7 +30,7 @@
30
30
 
31
31
  require 'tempfile'
32
32
 
33
- class Tb
33
+ module Tb
34
34
  end
35
35
 
36
36
  require 'pp'
@@ -38,17 +38,25 @@ require 'tb/enumerable'
38
38
  require 'tb/enumerator'
39
39
  require 'tb/func'
40
40
  require 'tb/zipper'
41
- require 'tb/basic'
42
- require 'tb/record'
41
+
42
+ require 'tb/headerreader'
43
+ require 'tb/headerwriter'
44
+
45
+ require 'tb/numericreader'
46
+ require 'tb/numericwriter'
47
+
48
+ require 'tb/hashreader'
49
+ require 'tb/hashwriter'
50
+
43
51
  require 'tb/csv'
44
52
  require 'tb/tsv'
45
53
  require 'tb/ltsv'
46
54
  require 'tb/pnm'
47
55
  require 'tb/json'
48
- require 'tb/reader'
56
+ require 'tb/ndjson'
57
+
49
58
  require 'tb/ropen'
50
59
  require 'tb/catreader'
51
- require 'tb/fieldset'
52
60
  require 'tb/search'
53
61
  require 'tb/ex_enumerable'
54
62
  require 'tb/ex_enumerator'
@@ -1,6 +1,6 @@
1
1
  # lib/tb/catreader.rb - Tb::CatReader class
2
2
  #
3
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
3
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
4
4
  #
5
5
  # Redistribution and use in source and binary forms, with or without
6
6
  # modification, are permitted provided that the following conditions
@@ -32,7 +32,7 @@ module Tb::CatReader
32
32
  def self.open(filenames, numeric=false, with_filename=false)
33
33
  readers = []
34
34
  filenames.each {|f|
35
- r = Tb.open_reader(f, numeric ? {:numeric=>true} : {})
35
+ r = Tb.open_reader(f, numeric)
36
36
  if with_filename
37
37
  r = r.newfield("filename") { f }
38
38
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -74,7 +74,11 @@ def (Tb::Cmd).main_consecutive(argv)
74
74
  ps = buf[i]
75
75
  next if !ps.has_key?(f)
76
76
  v = ps[f]
77
- pairs2["#{f}_#{i+1}"] = v
77
+ if Tb::Cmd.opt_N
78
+ pairs2[((f.to_i-1) * Tb::Cmd.opt_consecutive_n + i + 1).to_s] = v
79
+ else
80
+ pairs2["#{f}_#{i+1}"] = v
81
+ end
78
82
  }
79
83
  }
80
84
  empty = false
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -35,10 +35,26 @@ def (Tb::Cmd).op_crop
35
35
  op.banner = "Usage: tb crop [OPTS] [TABLE ...]\n" +
36
36
  "Extract rectangle in a table."
37
37
  define_common_option(op, "ho", "--no-pager")
38
- op.def_option('-r RANGE', 'range. i.e. "2,1-4,3", "B1:D3"') {|arg| Tb::Cmd.opt_crop_range = arg }
38
+ op.def_option('-r RANGE', 'range. i.e. "R2C1:R4C3", "B1:D3"') {|arg| Tb::Cmd.opt_crop_range = arg }
39
39
  op
40
40
  end
41
41
 
42
+ Tb::Cmd.def_vhelp('crop', <<'End')
43
+ Example:
44
+
45
+ % cat tst.csv
46
+ 0,1,2,4
47
+ 5,6,7,8
48
+ 9,a,b,c
49
+ d,e,f,g
50
+ h,i,j,k
51
+ % tb crop -r R2C2:R4C3 tst.csv
52
+ 6,7
53
+ a,b
54
+ e,f
55
+ End
56
+
57
+
42
58
  def (Tb::Cmd).decode_a1_addressing_col(str)
43
59
  (26**str.length-1)/25+str.tr("A-Z", "0-9A-P").to_i(26)
44
60
  end
@@ -79,7 +95,10 @@ def (Tb::Cmd).main_crop(argv)
79
95
  f = f.to_i
80
96
  f < range_col1 || range_col2 < f
81
97
  }
82
- y.yield pairs2
98
+ pairs2 = pairs2.map {|f, v|
99
+ [(f.to_i - range_col1 + 1).to_s, v]
100
+ }
101
+ y.yield Hash[pairs2]
83
102
  end
84
103
  rownum += 1
85
104
  }
@@ -42,6 +42,30 @@ def (Tb::Cmd).op_cross
42
42
  op
43
43
  end
44
44
 
45
+ Tb::Cmd.def_vhelp('cross', <<'End')
46
+ Example:
47
+
48
+ % cat tst.csv
49
+ a,b,c
50
+ A,X,2
51
+ A,Y,3
52
+ B,Y,4
53
+ % tb cross a b tst.csv
54
+ b,X,Y
55
+ a,count,count
56
+ A,1,1
57
+ B,,1
58
+ % tb cross -c a b tst.csv
59
+ a,X,Y
60
+ A,1,1
61
+ B,,1
62
+ % tb cross a b -a 'avg(c)' tst.csv
63
+ b,X,Y
64
+ a,avg(c),avg(c)
65
+ A,2.0,3.0
66
+ B,,4.0
67
+ End
68
+
45
69
  def (Tb::Cmd).main_cross(argv)
46
70
  op_cross.parse!(argv)
47
71
  exit_if_help('cross')
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -39,6 +39,23 @@ def (Tb::Cmd).op_cut
39
39
  op
40
40
  end
41
41
 
42
+ Tb::Cmd.def_vhelp('cut', <<'End')
43
+ Example:
44
+
45
+ % cat tst.csv
46
+ a,b,c
47
+ 0,1,2
48
+ 4,5,6
49
+ % tb cut a,c tst.csv
50
+ a,c
51
+ 0,2
52
+ 4,6
53
+ % tb cut -v a tst.csv
54
+ b,c
55
+ 1,2
56
+ 5,6
57
+ End
58
+
42
59
  def (Tb::Cmd).main_cut(argv)
43
60
  op_cut.parse!(argv)
44
61
  exit_if_help('cut')
@@ -59,15 +76,8 @@ def (Tb::Cmd).main_cut(argv)
59
76
  output_tbenum(er)
60
77
  else
61
78
  er = Tb::Enumerator.new {|y|
62
- tblreader.with_header {|header0|
63
- if header0
64
- fieldset = Tb::FieldSet.new(*header0)
65
- fs.each {|f|
66
- fieldset.index_from_field_ex(f)
67
- }
68
- end
69
- y.set_header fs
70
- }.each {|pairs|
79
+ y.set_header fs
80
+ tblreader.each {|pairs|
71
81
  y.yield pairs.reject {|k, v| !fs.include?(k) }
72
82
  }
73
83
  }
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2011-2012 Tanaka Akira <akr@fsij.org>
1
+ # Copyright (C) 2011-2014 Tanaka Akira <akr@fsij.org>
2
2
  #
3
3
  # Redistribution and use in source and binary forms, with or without
4
4
  # modification, are permitted provided that the following conditions
@@ -43,6 +43,20 @@ def (Tb::Cmd).op_git
43
43
  op
44
44
  end
45
45
 
46
+ Tb::Cmd.def_vhelp('git', <<'End')
47
+ Example:
48
+
49
+ % tb git | tb cut commit
50
+ commit
51
+ dedc77387459ea90b5f508be03cce9ecf0d990b5
52
+ bebf22d1d97921fb86f816ea48235948fa4c73b3
53
+ 38d61f96a8b33a6c5d527e564dc78b4a47b3eff0
54
+ 48c10d3b1b0609d389ce1d9203e57f63c1408703
55
+ d002c43e0a85ec7051f667140261d6b0e74e9039
56
+ 9292cade7c71d0b8805898621a53ba7a4713d029
57
+ ...
58
+ End
59
+
46
60
  Tb::Cmd::GIT_LOG_FORMAT_SPEC = [
47
61
  %w[commit %H],
48
62
  %w[tree %T],
@@ -150,12 +164,11 @@ def (Tb::Cmd).git_parse_commit(commit_info, files)
150
164
  warn "unexpected git output (raw/numstat): #{file_line.inspect}"
151
165
  end
152
166
  }
153
- Tb.csv_stream_output(files_csv="") {|gen|
154
- gen << %w[mode1 mode2 hash1 hash2 add del status filename]
155
- files_raw.each {|filename, (mode1, mode2, hash1, hash2, status)|
156
- add, del = files_numstat[filename]
157
- gen << [mode1, mode2, hash1, hash2, add, del, status, filename]
158
- }
167
+ files_csv = ""
168
+ files_csv << %w[mode1 mode2 hash1 hash2 add del status filename].to_csv
169
+ files_raw.each {|filename, (mode1, mode2, hash1, hash2, status)|
170
+ add, del = files_numstat[filename]
171
+ files_csv << [mode1, mode2, hash1, hash2, add, del, status, filename].to_csv
159
172
  }
160
173
  h = {}
161
174
  commit_info.each {|s|
@@ -41,6 +41,38 @@ def (Tb::Cmd).op_group
41
41
  op
42
42
  end
43
43
 
44
+ Tb::Cmd.def_vhelp('group', <<'End')
45
+ Example:
46
+
47
+ % cat tst.csv
48
+ a,b,c
49
+ A,X,2
50
+ A,Y,3
51
+ B,Y,4
52
+ % tb group a tst.csv
53
+ a
54
+ A
55
+ B
56
+ % tb group a -a count tst.csv
57
+ a,count
58
+ A,2
59
+ B,1
60
+ % tb group a -a 'avg(c)' tst.csv
61
+ a,avg(c)
62
+ A,2.5
63
+ B,4.0
64
+ % tb group a,b tst.csv
65
+ a,b
66
+ A,X
67
+ A,Y
68
+ B,Y
69
+ % tb group a,b -a count tst.csv
70
+ a,b,count
71
+ A,X,1
72
+ A,Y,1
73
+ B,Y,1
74
+ End
75
+
44
76
  def (Tb::Cmd).main_group(argv)
45
77
  op_group.parse!(argv)
46
78
  exit_if_help('group')
@@ -41,6 +41,27 @@ def (Tb::Cmd).op_gsub
41
41
  op
42
42
  end
43
43
 
44
+ Tb::Cmd.def_vhelp('gsub', <<'End')
45
+ Example:
46
+
47
+ % cat tst.csv
48
+ foo,bar
49
+ baz,qux
50
+ hoge,moga
51
+ % tb gsub o X tst.csv
52
+ foo,bar
53
+ baz,qux
54
+ hXge,mXga
55
+ % tb gsub -f foo o X tst.csv
56
+ foo,bar
57
+ baz,qux
58
+ hXge,moga
59
+ % tb gsub '[aeiou]' '{\&}' tst.csv
60
+ foo,bar
61
+ b{a}z,q{u}x
62
+ h{o}g{e},m{o}g{a}
63
+ End
64
+
44
65
  def (Tb::Cmd).main_gsub(argv)
45
66
  op_gsub.parse!(argv)
46
67
  exit_if_help('gsub')
@@ -59,6 +59,34 @@ def (Tb::Cmd).op_join
59
59
  op
60
60
  end
61
61
 
62
+ Tb::Cmd.def_vhelp('join', <<'End')
63
+ Example:
64
+
65
+ % cat tst1.csv
66
+ name,length
67
+ A,20
68
+ B,30
69
+ C,25
70
+ % cat tst2.csv
71
+ name,weight
72
+ A,5
73
+ B,8
74
+ % tb join tst1.csv tst2.csv
75
+ name,length,weight
76
+ A,20,5
77
+ B,30,8
78
+ % tb join --left tst1.csv tst2.csv
79
+ name,length,weight
80
+ A,20,5
81
+ B,30,8
82
+ C,25,
83
+ % tb join --left --outer-missing=zzz tst1.csv tst2.csv
84
+ name,length,weight
85
+ A,20,5
86
+ B,30,8
87
+ C,25,zzz
88
+ End
89
+
62
90
  def (Tb::Cmd).main_join(argv)
63
91
  op_join.parse!(argv)
64
92
  exit_if_help('join')
@@ -45,6 +45,15 @@ def (Tb::Cmd).op_ls
45
45
  op
46
46
  end
47
47
 
48
+ Tb::Cmd.def_vhelp('ls', <<'End')
49
+ Example:
50
+
51
+ % tb ls -l lib
52
+ filemode,nlink,user,group,size,mtime,filename,symlink
53
+ drwxr-xr-x,2,akr,akr,4096,2014-10-26T09:15:35+09:00,lib/tb,
54
+ -rw-r--r--,1,akr,akr,2113,2014-10-25T18:40:05+09:00,lib/tb.rb,
55
+ End
56
+
48
57
  def (Tb::Cmd).main_ls(argv)
49
58
  op_ls.parse!(argv)
50
59
  exit_if_help('ls')