td 0.10.65 → 0.10.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,401 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe FileReader do
10
+ include_context 'error_proc'
11
+
12
+ describe 'initialize' do
13
+ subject { FileReader.new }
14
+
15
+ its(:parser_class) { should be_nil }
16
+ its(:opts) { should be_empty }
17
+ [:delimiter_expr, :null_expr, :true_expr, :false_expr].each { |key|
18
+ its(:default_opts) { should have_key(key); }
19
+ }
20
+ end
21
+
22
+ let :reader do
23
+ FileReader.new
24
+ end
25
+
26
+ describe 'set_format_template' do
27
+ it 'can set csv' do
28
+ reader.set_format_template('csv')
29
+ reader.instance_variable_get(:@format).should == 'text'
30
+ reader.opts.should include(:delimiter_expr => /,/)
31
+ end
32
+
33
+ it 'can set tsv' do
34
+ reader.set_format_template('tsv')
35
+ reader.instance_variable_get(:@format).should == 'text'
36
+ reader.opts.should include(:delimiter_expr => /\t/)
37
+ end
38
+
39
+ it 'can set apache' do
40
+ reader.set_format_template('apache')
41
+ reader.instance_variable_get(:@format).should == 'apache'
42
+ reader.opts.should include(:time_column => 'time')
43
+ end
44
+
45
+ it 'can set syslog' do
46
+ reader.set_format_template('syslog')
47
+ reader.instance_variable_get(:@format).should == 'syslog'
48
+ reader.opts.should include(:time_column => 'time')
49
+ end
50
+
51
+ it 'can set msgpack' do
52
+ reader.set_format_template('msgpack')
53
+ reader.instance_variable_get(:@format).should == 'msgpack'
54
+ end
55
+
56
+ it 'can set json' do
57
+ reader.set_format_template('json')
58
+ reader.instance_variable_get(:@format).should == 'json'
59
+ end
60
+
61
+ it 'raises when set unknown format' do
62
+ expect {
63
+ reader.set_format_template('oreore')
64
+ }.to raise_error(Exception, /Unknown format: oreore/)
65
+ end
66
+ end
67
+
68
+ describe 'init_optparse' do
69
+ def parse_opt(argv, &block)
70
+ op = OptionParser.new
71
+ reader.init_optparse(op)
72
+ op.parse!(argv)
73
+ block.call
74
+ end
75
+
76
+ context '-f option' do
77
+ ['-f', '--format'].each { |opt|
78
+ ['csv', 'tsv', 'apache', 'syslog', 'msgpack', 'json'].each { |format|
79
+ it "#{opt} option with #{format}" do
80
+ reader.should_receive(:set_format_template).with(format)
81
+ parse_opt([opt, format]) { }
82
+ end
83
+ }
84
+ }
85
+ end
86
+
87
+ context 'columns names option' do
88
+ ['-h', '--columns'].each { |opt|
89
+ it "#{opt} option" do
90
+ columns = 'A,B,C'
91
+ parse_opt([opt, columns]) {
92
+ reader.opts.should include(:column_names => columns.split(','))
93
+ }
94
+ end
95
+ }
96
+ end
97
+
98
+ context 'columns header option' do
99
+ ['-H', '--column-header'].each { |opt|
100
+ it "#{opt} option" do
101
+ parse_opt([opt]) {
102
+ reader.opts.should include(:column_header => true)
103
+ }
104
+ end
105
+ }
106
+ end
107
+
108
+ context 'delimiter between column option' do
109
+ ['-d', '--delimiter'].each { |opt|
110
+ it "#{opt} option" do
111
+ pattern = '!'
112
+ parse_opt([opt, pattern]) {
113
+ reader.opts.should include(:delimiter_expr => Regexp.new(pattern))
114
+ }
115
+ end
116
+ }
117
+ end
118
+
119
+ context 'null expression option' do
120
+ it "--null REGEX option" do
121
+ pattern = 'null'
122
+ parse_opt(['--null', pattern]) {
123
+ reader.opts.should include(:null_expr => Regexp.new(pattern))
124
+ }
125
+ end
126
+ end
127
+
128
+ context 'true expression option' do
129
+ it "--true REGEX option" do
130
+ pattern = 'true'
131
+ parse_opt(['--true', pattern]) {
132
+ reader.opts.should include(:true_expr => Regexp.new(pattern))
133
+ }
134
+ end
135
+ end
136
+
137
+ context 'false expression option' do
138
+ it "--false REGEX option" do
139
+ pattern = 'false'
140
+ parse_opt(['--false', pattern]) {
141
+ reader.opts.should include(:false_expr => Regexp.new(pattern))
142
+ }
143
+ end
144
+ end
145
+
146
+ context 'disable automatic type conversion option' do
147
+ ['-S', '--all-string'].each { |opt|
148
+ it "#{opt} option" do
149
+ parse_opt([opt]) {
150
+ reader.opts.should include(:all_string => true)
151
+ }
152
+ end
153
+ }
154
+ end
155
+
156
+ context 'name of the time column option' do
157
+ ['-t', '--time-column'].each { |opt|
158
+ it "#{opt} option" do
159
+ name = 'created_at'
160
+ parse_opt([opt, name]) {
161
+ reader.opts.should include(:time_column => name)
162
+ }
163
+ end
164
+ }
165
+ end
166
+
167
+ context 'strftime(3) format of the time column option' do
168
+ ['-T', '--time-format'].each { |opt|
169
+ it "#{opt} option" do
170
+ format = '%Y'
171
+ parse_opt([opt, format]) {
172
+ reader.opts.should include(:time_format => format)
173
+ }
174
+ end
175
+ }
176
+ end
177
+
178
+ context 'value of the time column option' do
179
+ {'int' => lambda { |t| t.to_i.to_s }, 'formatted' => lambda { |t| t.to_s }}.each_pair { |value_type, converter|
180
+ it "--time-value option with #{value_type}" do
181
+ time = Time.now
182
+ parse_opt(['--time-value', converter.call(time)]) {
183
+ reader.opts.should include(:time_value => time.to_i)
184
+ }
185
+ end
186
+ }
187
+ end
188
+
189
+ context 'text encoding option' do
190
+ ['-e', '--encoding'].each { |opt|
191
+ it "#{opt} option" do
192
+ enc = 'utf-8'
193
+ parse_opt([opt, enc]) {
194
+ reader.opts.should include(:encoding => enc)
195
+ }
196
+ end
197
+ }
198
+ end
199
+
200
+ context 'compression format option' do
201
+ ['-C', '--compress'].each { |opt|
202
+ it "#{opt} option" do
203
+ format = 'gzip'
204
+ parse_opt([opt, format]) {
205
+ reader.opts.should include(:compress => format)
206
+ }
207
+ end
208
+ }
209
+ end
210
+ end
211
+
212
+ describe 'compose_factory' do
213
+ it 'returns Proc object' do
214
+ factory = reader.compose_factory
215
+ factory.should be_an_instance_of(Proc)
216
+ end
217
+
218
+ # other specs in parse spec
219
+ end
220
+
221
+ describe 'parse' do
222
+ let :dataset_header do
223
+ ['name', 'num', 'created_at', 'flag']
224
+ end
225
+
226
+ let :dataset_values do
227
+ [
228
+ ['k', 12345, Time.now.to_s, true],
229
+ ['s', 34567, Time.now.to_s, false],
230
+ ['n', 56789, Time.now.to_s, true],
231
+ ]
232
+ end
233
+
234
+ let :dataset do
235
+ dataset_values.map { |data|
236
+ Hash[dataset_header.zip(data)]
237
+ }
238
+ end
239
+
240
+ let :time_column do
241
+ 'created_at'
242
+ end
243
+
244
+ def parse_opt(argv, &block)
245
+ op = OptionParser.new
246
+ reader.init_optparse(op)
247
+ op.parse!(argv)
248
+ block.call
249
+ end
250
+
251
+ shared_examples_for 'parse --time-value / --time-column cases' do |format, args|
252
+ it "parse #{format} with --time-value" do
253
+ @time = Time.now.to_i
254
+ parse_opt(%W(-f #{format} --time-value #{@time}) + (args || [])) {
255
+ i = 0
256
+ reader.parse(io, error) { |record|
257
+ record.should == dataset[i].merge('time' => @time)
258
+ i += 1
259
+ }
260
+ }
261
+ end
262
+
263
+ it "parse #{format} with --time-column" do
264
+ parse_opt(%W(-f #{format} --time-column #{time_column}) + (args || [])) {
265
+ i = 0
266
+ reader.parse(io, error) { |record|
267
+ time = record[time_column]
268
+ time = Time.parse(time).to_i if time.is_a?(String)
269
+ record.should == dataset[i].merge('time' => time)
270
+ i += 1
271
+ }
272
+ }
273
+ end
274
+ end
275
+
276
+ shared_examples_for 'parse --columns / --column-header cases' do |format|
277
+ converter = "to_#{format}".to_sym
278
+
279
+ context 'array format' do
280
+ let :lines do
281
+ dataset_values.map { |data| data.__send__(converter) }
282
+ end
283
+
284
+ context 'with --column-columns' do
285
+ it_should_behave_like 'parse --time-value / --time-column cases', format, %W(-h name,num,created_at,flag)
286
+ end
287
+
288
+ context 'with --column-header' do
289
+ let :lines do
290
+ [dataset_header.__send__(converter)] + dataset_values.map { |data| data.__send__(converter) }
291
+ end
292
+
293
+ it_should_behave_like 'parse --time-value / --time-column cases', format, %W(-H)
294
+ end
295
+ end
296
+ end
297
+
298
+ let :io do
299
+ StringIO.new(lines.join("\n"))
300
+ end
301
+
302
+ context 'json' do
303
+ require 'json'
304
+
305
+ let :lines do
306
+ dataset.map(&:to_json)
307
+ end
308
+
309
+ it_should_behave_like 'parse --time-value / --time-column cases', 'json'
310
+ it_should_behave_like 'parse --columns / --column-header cases', 'json'
311
+ end
312
+
313
+ context 'msgpack' do
314
+ require 'msgpack'
315
+
316
+ let :lines do
317
+ dataset.map(&:to_msgpack)
318
+ end
319
+
320
+ let :io do
321
+ StringIO.new(lines.join(""))
322
+ end
323
+
324
+ it_should_behave_like 'parse --time-value / --time-column cases', 'msgpack'
325
+ it_should_behave_like 'parse --columns / --column-header cases', 'msgpack'
326
+ end
327
+
328
+ [['csv', ','], ['tsv', "\t"]].each { |text_type, pattern|
329
+ context 'text' do
330
+ let :lines do
331
+ dataset_values.map { |data| data.map(&:to_s).join(pattern) }
332
+ end
333
+
334
+ it "raises an exception without --column-header or --columns in #{pattern}" do
335
+ parse_opt(%W(-f #{text_type})) {
336
+ expect {
337
+ reader.parse(io, error)
338
+ }.to raise_error(Exception, /--column-header or --columns option is required/)
339
+ }
340
+ end
341
+
342
+ context 'with --column-columns' do
343
+ it_should_behave_like 'parse --time-value / --time-column cases', text_type, %W(-h name,num,created_at,flag)
344
+ end
345
+
346
+ context 'with --column-header' do
347
+ let :lines do
348
+ [dataset_header.join(pattern)] + dataset_values.map { |data| data.map(&:to_s).join(pattern) }
349
+ end
350
+
351
+ it_should_behave_like 'parse --time-value / --time-column cases', text_type, %W(-H)
352
+ end
353
+
354
+ # TODO: Add all_string
355
+ end
356
+ }
357
+
358
+ {
359
+ 'apache' => [
360
+ [
361
+ '58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
362
+ '127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
363
+ '68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
364
+ ],
365
+ [
366
+ {"host" => "58.83.188.60", "user" => nil, "time" => 1319382946, "method" => "HEAD", "path" => "/", "code" => 200, "size" => 277, "referer" => nil, "agent" => nil},
367
+ {"host" => "127.0.0.1", "user" => nil, "time" => 1319383201, "method" => "GET", "path" => "/", "code" => 200, "size" => 492, "referer" => nil, "agent" => "Wget/1.12 (linux-gnu)"},
368
+ {"host" => "68.64.37.100", "user" => nil, "time" => 1319446134, "method" => "GET", "path" => "/phpMyAdmin/scripts/setup.php", "code" => 404, "size" => 480, "referer" => nil, "agent" => "ZmEu"},
369
+ ]
370
+ ],
371
+ 'syslog' => [
372
+ [
373
+ 'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
374
+ 'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
375
+ 'Dec 22 10:42:41 localhost kernel[10000]: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
376
+ ],
377
+ [
378
+ {"pid" => nil, "time" => 1355974904, "host" => "localhost", "ident" => "kernel", "message" => "[4843680.692840] e1000e: eth2 NIC Link is Down"},
379
+ {"pid" => nil, "time" => 1355974904, "host" => "localhost", "ident" => "kernel", "message" => "[4843680.734466] br0: port 1(eth2) entering disabled state"},
380
+ {"pid" => 10000, "time" => 1356140561, "host" => "localhost", "ident" => "kernel", "message" => "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"},
381
+ ]
382
+ ]
383
+ }.each_pair { |format, (input, output)|
384
+ context format do
385
+ let :lines do
386
+ input
387
+ end
388
+
389
+ let :dataset do
390
+ output
391
+ end
392
+
393
+ let :time_column do
394
+ 'time'
395
+ end
396
+
397
+ it_should_behave_like 'parse --time-value / --time-column cases', format
398
+ end
399
+ }
400
+ end
401
+ end
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rspec'
5
+ require 'json'
6
+
7
+ if ENV['SIMPLE_COV']
8
+ # SimpleCov
9
+ # https://github.com/colszowka/simplecov
10
+ require 'simplecov'
11
+ SimpleCov.start do
12
+ add_filter 'spec/'
13
+ add_filter 'pkg/'
14
+ add_filter 'vendor/'
15
+ end
16
+ end
data/td.gemspec CHANGED
@@ -20,7 +20,9 @@ Gem::Specification.new do |gem|
20
20
  gem.add_dependency "yajl-ruby", "~> 1.1.0"
21
21
  gem.add_dependency "hirb", ">= 0.4.5"
22
22
  gem.add_dependency "parallel", "~> 0.5.19"
23
- gem.add_dependency "td-client", "~> 0.8.40"
23
+ gem.add_dependency "td-client", "~> 0.8.42"
24
24
  gem.add_dependency "td-logger", "~> 0.3.16"
25
25
  gem.add_development_dependency "rake", "~> 0.9"
26
+ gem.add_development_dependency "rspec", "~> 2.10.0"
27
+ gem.add_development_dependency "simplecov", "~> 0.5.4"
26
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.65
4
+ version: 0.10.66
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-27 00:00:00.000000000 Z
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -82,7 +82,7 @@ dependencies:
82
82
  requirements:
83
83
  - - ~>
84
84
  - !ruby/object:Gem::Version
85
- version: 0.8.40
85
+ version: 0.8.42
86
86
  type: :runtime
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
@@ -90,7 +90,7 @@ dependencies:
90
90
  requirements:
91
91
  - - ~>
92
92
  - !ruby/object:Gem::Version
93
- version: 0.8.40
93
+ version: 0.8.42
94
94
  - !ruby/object:Gem::Dependency
95
95
  name: td-logger
96
96
  requirement: !ruby/object:Gem::Requirement
@@ -123,6 +123,38 @@ dependencies:
123
123
  - - ~>
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0.9'
126
+ - !ruby/object:Gem::Dependency
127
+ name: rspec
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 2.10.0
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 2.10.0
142
+ - !ruby/object:Gem::Dependency
143
+ name: simplecov
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.5.4
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.5.4
126
158
  description: CLI to manage data on Treasure Data, the Hadoop-based cloud data warehousing
127
159
  email: support@treasure-data.com
128
160
  executables:
@@ -179,6 +211,13 @@ files:
179
211
  - lib/td/distribution.rb
180
212
  - lib/td/file_reader.rb
181
213
  - lib/td/version.rb
214
+ - spec/file_reader/filter_spec.rb
215
+ - spec/file_reader/io_filter_spec.rb
216
+ - spec/file_reader/line_reader_spec.rb
217
+ - spec/file_reader/parsing_reader_spec.rb
218
+ - spec/file_reader/shared_context.rb
219
+ - spec/file_reader_spec.rb
220
+ - spec/spec_helper.rb
182
221
  - td.gemspec
183
222
  homepage: http://treasure-data.com/
184
223
  licenses: []
@@ -204,4 +243,12 @@ rubygems_version: 1.8.23
204
243
  signing_key:
205
244
  specification_version: 3
206
245
  summary: CLI to manage data on Treasure Data, the Hadoop-based cloud data warehousing
207
- test_files: []
246
+ test_files:
247
+ - spec/file_reader/filter_spec.rb
248
+ - spec/file_reader/io_filter_spec.rb
249
+ - spec/file_reader/line_reader_spec.rb
250
+ - spec/file_reader/parsing_reader_spec.rb
251
+ - spec/file_reader/shared_context.rb
252
+ - spec/file_reader_spec.rb
253
+ - spec/spec_helper.rb
254
+ has_rdoc: false