td 0.10.65 → 0.10.66

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,401 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe FileReader do
10
+ include_context 'error_proc'
11
+
12
+ describe 'initialize' do
13
+ subject { FileReader.new }
14
+
15
+ its(:parser_class) { should be_nil }
16
+ its(:opts) { should be_empty }
17
+ [:delimiter_expr, :null_expr, :true_expr, :false_expr].each { |key|
18
+ its(:default_opts) { should have_key(key); }
19
+ }
20
+ end
21
+
22
+ let :reader do
23
+ FileReader.new
24
+ end
25
+
26
+ describe 'set_format_template' do
27
+ it 'can set csv' do
28
+ reader.set_format_template('csv')
29
+ reader.instance_variable_get(:@format).should == 'text'
30
+ reader.opts.should include(:delimiter_expr => /,/)
31
+ end
32
+
33
+ it 'can set tsv' do
34
+ reader.set_format_template('tsv')
35
+ reader.instance_variable_get(:@format).should == 'text'
36
+ reader.opts.should include(:delimiter_expr => /\t/)
37
+ end
38
+
39
+ it 'can set apache' do
40
+ reader.set_format_template('apache')
41
+ reader.instance_variable_get(:@format).should == 'apache'
42
+ reader.opts.should include(:time_column => 'time')
43
+ end
44
+
45
+ it 'can set syslog' do
46
+ reader.set_format_template('syslog')
47
+ reader.instance_variable_get(:@format).should == 'syslog'
48
+ reader.opts.should include(:time_column => 'time')
49
+ end
50
+
51
+ it 'can set msgpack' do
52
+ reader.set_format_template('msgpack')
53
+ reader.instance_variable_get(:@format).should == 'msgpack'
54
+ end
55
+
56
+ it 'can set json' do
57
+ reader.set_format_template('json')
58
+ reader.instance_variable_get(:@format).should == 'json'
59
+ end
60
+
61
+ it 'raises when set unknown format' do
62
+ expect {
63
+ reader.set_format_template('oreore')
64
+ }.to raise_error(Exception, /Unknown format: oreore/)
65
+ end
66
+ end
67
+
68
+ describe 'init_optparse' do
69
+ def parse_opt(argv, &block)
70
+ op = OptionParser.new
71
+ reader.init_optparse(op)
72
+ op.parse!(argv)
73
+ block.call
74
+ end
75
+
76
+ context '-f option' do
77
+ ['-f', '--format'].each { |opt|
78
+ ['csv', 'tsv', 'apache', 'syslog', 'msgpack', 'json'].each { |format|
79
+ it "#{opt} option with #{format}" do
80
+ reader.should_receive(:set_format_template).with(format)
81
+ parse_opt([opt, format]) { }
82
+ end
83
+ }
84
+ }
85
+ end
86
+
87
+ context 'columns names option' do
88
+ ['-h', '--columns'].each { |opt|
89
+ it "#{opt} option" do
90
+ columns = 'A,B,C'
91
+ parse_opt([opt, columns]) {
92
+ reader.opts.should include(:column_names => columns.split(','))
93
+ }
94
+ end
95
+ }
96
+ end
97
+
98
+ context 'columns header option' do
99
+ ['-H', '--column-header'].each { |opt|
100
+ it "#{opt} option" do
101
+ parse_opt([opt]) {
102
+ reader.opts.should include(:column_header => true)
103
+ }
104
+ end
105
+ }
106
+ end
107
+
108
+ context 'delimiter between column option' do
109
+ ['-d', '--delimiter'].each { |opt|
110
+ it "#{opt} option" do
111
+ pattern = '!'
112
+ parse_opt([opt, pattern]) {
113
+ reader.opts.should include(:delimiter_expr => Regexp.new(pattern))
114
+ }
115
+ end
116
+ }
117
+ end
118
+
119
+ context 'null expression option' do
120
+ it "--null REGEX option" do
121
+ pattern = 'null'
122
+ parse_opt(['--null', pattern]) {
123
+ reader.opts.should include(:null_expr => Regexp.new(pattern))
124
+ }
125
+ end
126
+ end
127
+
128
+ context 'true expression option' do
129
+ it "--true REGEX option" do
130
+ pattern = 'true'
131
+ parse_opt(['--true', pattern]) {
132
+ reader.opts.should include(:true_expr => Regexp.new(pattern))
133
+ }
134
+ end
135
+ end
136
+
137
+ context 'false expression option' do
138
+ it "--false REGEX option" do
139
+ pattern = 'false'
140
+ parse_opt(['--false', pattern]) {
141
+ reader.opts.should include(:false_expr => Regexp.new(pattern))
142
+ }
143
+ end
144
+ end
145
+
146
+ context 'disable automatic type conversion option' do
147
+ ['-S', '--all-string'].each { |opt|
148
+ it "#{opt} option" do
149
+ parse_opt([opt]) {
150
+ reader.opts.should include(:all_string => true)
151
+ }
152
+ end
153
+ }
154
+ end
155
+
156
+ context 'name of the time column option' do
157
+ ['-t', '--time-column'].each { |opt|
158
+ it "#{opt} option" do
159
+ name = 'created_at'
160
+ parse_opt([opt, name]) {
161
+ reader.opts.should include(:time_column => name)
162
+ }
163
+ end
164
+ }
165
+ end
166
+
167
+ context 'strftime(3) format of the time column option' do
168
+ ['-T', '--time-format'].each { |opt|
169
+ it "#{opt} option" do
170
+ format = '%Y'
171
+ parse_opt([opt, format]) {
172
+ reader.opts.should include(:time_format => format)
173
+ }
174
+ end
175
+ }
176
+ end
177
+
178
+ context 'value of the time column option' do
179
+ {'int' => lambda { |t| t.to_i.to_s }, 'formatted' => lambda { |t| t.to_s }}.each_pair { |value_type, converter|
180
+ it "--time-value option with #{value_type}" do
181
+ time = Time.now
182
+ parse_opt(['--time-value', converter.call(time)]) {
183
+ reader.opts.should include(:time_value => time.to_i)
184
+ }
185
+ end
186
+ }
187
+ end
188
+
189
+ context 'text encoding option' do
190
+ ['-e', '--encoding'].each { |opt|
191
+ it "#{opt} option" do
192
+ enc = 'utf-8'
193
+ parse_opt([opt, enc]) {
194
+ reader.opts.should include(:encoding => enc)
195
+ }
196
+ end
197
+ }
198
+ end
199
+
200
+ context 'compression format option' do
201
+ ['-C', '--compress'].each { |opt|
202
+ it "#{opt} option" do
203
+ format = 'gzip'
204
+ parse_opt([opt, format]) {
205
+ reader.opts.should include(:compress => format)
206
+ }
207
+ end
208
+ }
209
+ end
210
+ end
211
+
212
+ describe 'compose_factory' do
213
+ it 'returns Proc object' do
214
+ factory = reader.compose_factory
215
+ factory.should be_an_instance_of(Proc)
216
+ end
217
+
218
+ # other specs in parse spec
219
+ end
220
+
221
+ describe 'parse' do
222
+ let :dataset_header do
223
+ ['name', 'num', 'created_at', 'flag']
224
+ end
225
+
226
+ let :dataset_values do
227
+ [
228
+ ['k', 12345, Time.now.to_s, true],
229
+ ['s', 34567, Time.now.to_s, false],
230
+ ['n', 56789, Time.now.to_s, true],
231
+ ]
232
+ end
233
+
234
+ let :dataset do
235
+ dataset_values.map { |data|
236
+ Hash[dataset_header.zip(data)]
237
+ }
238
+ end
239
+
240
+ let :time_column do
241
+ 'created_at'
242
+ end
243
+
244
+ def parse_opt(argv, &block)
245
+ op = OptionParser.new
246
+ reader.init_optparse(op)
247
+ op.parse!(argv)
248
+ block.call
249
+ end
250
+
251
+ shared_examples_for 'parse --time-value / --time-column cases' do |format, args|
252
+ it "parse #{format} with --time-value" do
253
+ @time = Time.now.to_i
254
+ parse_opt(%W(-f #{format} --time-value #{@time}) + (args || [])) {
255
+ i = 0
256
+ reader.parse(io, error) { |record|
257
+ record.should == dataset[i].merge('time' => @time)
258
+ i += 1
259
+ }
260
+ }
261
+ end
262
+
263
+ it "parse #{format} with --time-column" do
264
+ parse_opt(%W(-f #{format} --time-column #{time_column}) + (args || [])) {
265
+ i = 0
266
+ reader.parse(io, error) { |record|
267
+ time = record[time_column]
268
+ time = Time.parse(time).to_i if time.is_a?(String)
269
+ record.should == dataset[i].merge('time' => time)
270
+ i += 1
271
+ }
272
+ }
273
+ end
274
+ end
275
+
276
+ shared_examples_for 'parse --columns / --column-header cases' do |format|
277
+ converter = "to_#{format}".to_sym
278
+
279
+ context 'array format' do
280
+ let :lines do
281
+ dataset_values.map { |data| data.__send__(converter) }
282
+ end
283
+
284
+ context 'with --column-columns' do
285
+ it_should_behave_like 'parse --time-value / --time-column cases', format, %W(-h name,num,created_at,flag)
286
+ end
287
+
288
+ context 'with --column-header' do
289
+ let :lines do
290
+ [dataset_header.__send__(converter)] + dataset_values.map { |data| data.__send__(converter) }
291
+ end
292
+
293
+ it_should_behave_like 'parse --time-value / --time-column cases', format, %W(-H)
294
+ end
295
+ end
296
+ end
297
+
298
+ let :io do
299
+ StringIO.new(lines.join("\n"))
300
+ end
301
+
302
+ context 'json' do
303
+ require 'json'
304
+
305
+ let :lines do
306
+ dataset.map(&:to_json)
307
+ end
308
+
309
+ it_should_behave_like 'parse --time-value / --time-column cases', 'json'
310
+ it_should_behave_like 'parse --columns / --column-header cases', 'json'
311
+ end
312
+
313
+ context 'msgpack' do
314
+ require 'msgpack'
315
+
316
+ let :lines do
317
+ dataset.map(&:to_msgpack)
318
+ end
319
+
320
+ let :io do
321
+ StringIO.new(lines.join(""))
322
+ end
323
+
324
+ it_should_behave_like 'parse --time-value / --time-column cases', 'msgpack'
325
+ it_should_behave_like 'parse --columns / --column-header cases', 'msgpack'
326
+ end
327
+
328
+ [['csv', ','], ['tsv', "\t"]].each { |text_type, pattern|
329
+ context 'text' do
330
+ let :lines do
331
+ dataset_values.map { |data| data.map(&:to_s).join(pattern) }
332
+ end
333
+
334
+ it "raises an exception without --column-header or --columns in #{pattern}" do
335
+ parse_opt(%W(-f #{text_type})) {
336
+ expect {
337
+ reader.parse(io, error)
338
+ }.to raise_error(Exception, /--column-header or --columns option is required/)
339
+ }
340
+ end
341
+
342
+ context 'with --column-columns' do
343
+ it_should_behave_like 'parse --time-value / --time-column cases', text_type, %W(-h name,num,created_at,flag)
344
+ end
345
+
346
+ context 'with --column-header' do
347
+ let :lines do
348
+ [dataset_header.join(pattern)] + dataset_values.map { |data| data.map(&:to_s).join(pattern) }
349
+ end
350
+
351
+ it_should_behave_like 'parse --time-value / --time-column cases', text_type, %W(-H)
352
+ end
353
+
354
+ # TODO: Add all_string
355
+ end
356
+ }
357
+
358
+ {
359
+ 'apache' => [
360
+ [
361
+ '58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
362
+ '127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
363
+ '68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
364
+ ],
365
+ [
366
+ {"host" => "58.83.188.60", "user" => nil, "time" => 1319382946, "method" => "HEAD", "path" => "/", "code" => 200, "size" => 277, "referer" => nil, "agent" => nil},
367
+ {"host" => "127.0.0.1", "user" => nil, "time" => 1319383201, "method" => "GET", "path" => "/", "code" => 200, "size" => 492, "referer" => nil, "agent" => "Wget/1.12 (linux-gnu)"},
368
+ {"host" => "68.64.37.100", "user" => nil, "time" => 1319446134, "method" => "GET", "path" => "/phpMyAdmin/scripts/setup.php", "code" => 404, "size" => 480, "referer" => nil, "agent" => "ZmEu"},
369
+ ]
370
+ ],
371
+ 'syslog' => [
372
+ [
373
+ 'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
374
+ 'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
375
+ 'Dec 22 10:42:41 localhost kernel[10000]: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
376
+ ],
377
+ [
378
+ {"pid" => nil, "time" => 1355974904, "host" => "localhost", "ident" => "kernel", "message" => "[4843680.692840] e1000e: eth2 NIC Link is Down"},
379
+ {"pid" => nil, "time" => 1355974904, "host" => "localhost", "ident" => "kernel", "message" => "[4843680.734466] br0: port 1(eth2) entering disabled state"},
380
+ {"pid" => 10000, "time" => 1356140561, "host" => "localhost", "ident" => "kernel", "message" => "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"},
381
+ ]
382
+ ]
383
+ }.each_pair { |format, (input, output)|
384
+ context format do
385
+ let :lines do
386
+ input
387
+ end
388
+
389
+ let :dataset do
390
+ output
391
+ end
392
+
393
+ let :time_column do
394
+ 'time'
395
+ end
396
+
397
+ it_should_behave_like 'parse --time-value / --time-column cases', format
398
+ end
399
+ }
400
+ end
401
+ end
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rspec'
5
+ require 'json'
6
+
7
+ if ENV['SIMPLE_COV']
8
+ # SimpleCov
9
+ # https://github.com/colszowka/simplecov
10
+ require 'simplecov'
11
+ SimpleCov.start do
12
+ add_filter 'spec/'
13
+ add_filter 'pkg/'
14
+ add_filter 'vendor/'
15
+ end
16
+ end
data/td.gemspec CHANGED
@@ -20,7 +20,9 @@ Gem::Specification.new do |gem|
20
20
  gem.add_dependency "yajl-ruby", "~> 1.1.0"
21
21
  gem.add_dependency "hirb", ">= 0.4.5"
22
22
  gem.add_dependency "parallel", "~> 0.5.19"
23
- gem.add_dependency "td-client", "~> 0.8.40"
23
+ gem.add_dependency "td-client", "~> 0.8.42"
24
24
  gem.add_dependency "td-logger", "~> 0.3.16"
25
25
  gem.add_development_dependency "rake", "~> 0.9"
26
+ gem.add_development_dependency "rspec", "~> 2.10.0"
27
+ gem.add_development_dependency "simplecov", "~> 0.5.4"
26
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.65
4
+ version: 0.10.66
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-27 00:00:00.000000000 Z
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -82,7 +82,7 @@ dependencies:
82
82
  requirements:
83
83
  - - ~>
84
84
  - !ruby/object:Gem::Version
85
- version: 0.8.40
85
+ version: 0.8.42
86
86
  type: :runtime
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
@@ -90,7 +90,7 @@ dependencies:
90
90
  requirements:
91
91
  - - ~>
92
92
  - !ruby/object:Gem::Version
93
- version: 0.8.40
93
+ version: 0.8.42
94
94
  - !ruby/object:Gem::Dependency
95
95
  name: td-logger
96
96
  requirement: !ruby/object:Gem::Requirement
@@ -123,6 +123,38 @@ dependencies:
123
123
  - - ~>
124
124
  - !ruby/object:Gem::Version
125
125
  version: '0.9'
126
+ - !ruby/object:Gem::Dependency
127
+ name: rspec
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 2.10.0
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 2.10.0
142
+ - !ruby/object:Gem::Dependency
143
+ name: simplecov
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.5.4
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.5.4
126
158
  description: CLI to manage data on Treasure Data, the Hadoop-based cloud data warehousing
127
159
  email: support@treasure-data.com
128
160
  executables:
@@ -179,6 +211,13 @@ files:
179
211
  - lib/td/distribution.rb
180
212
  - lib/td/file_reader.rb
181
213
  - lib/td/version.rb
214
+ - spec/file_reader/filter_spec.rb
215
+ - spec/file_reader/io_filter_spec.rb
216
+ - spec/file_reader/line_reader_spec.rb
217
+ - spec/file_reader/parsing_reader_spec.rb
218
+ - spec/file_reader/shared_context.rb
219
+ - spec/file_reader_spec.rb
220
+ - spec/spec_helper.rb
182
221
  - td.gemspec
183
222
  homepage: http://treasure-data.com/
184
223
  licenses: []
@@ -204,4 +243,12 @@ rubygems_version: 1.8.23
204
243
  signing_key:
205
244
  specification_version: 3
206
245
  summary: CLI to manage data on Treasure Data, the Hadoop-based cloud data warehousing
207
- test_files: []
246
+ test_files:
247
+ - spec/file_reader/filter_spec.rb
248
+ - spec/file_reader/io_filter_spec.rb
249
+ - spec/file_reader/line_reader_spec.rb
250
+ - spec/file_reader/parsing_reader_spec.rb
251
+ - spec/file_reader/shared_context.rb
252
+ - spec/file_reader_spec.rb
253
+ - spec/spec_helper.rb
254
+ has_rdoc: false