td 0.10.65 → 0.10.66

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,3 +1,10 @@
1
+
2
+ == 2013-01-16 version 0.10.66
3
+
4
+ * td-client v0.8.42
5
+ * query, sched:create, sched:update subcommands support -R, --retry option
6
+
7
+
1
8
  == 2012-12-27 version 0.10.65
2
9
 
3
10
  * td-client v0.8.40
@@ -206,8 +206,8 @@ module Command
206
206
  record = {}
207
207
 
208
208
  cap = m.captures
209
- @names.each_with_index {|name,i|
210
- if value = cap[i]
209
+ @names.each_with_index {|name,cap_i|
210
+ if value = cap[cap_i]
211
211
  if name == "time"
212
212
  value = parse_time(value).to_i
213
213
  end
@@ -275,7 +275,7 @@ module Command
275
275
 
276
276
  class MessagePackParser
277
277
  def initialize(time_key)
278
- require 'json'
278
+ require 'msgpack'
279
279
  @time_key = time_key
280
280
  end
281
281
 
@@ -119,6 +119,7 @@ module Command
119
119
  puts "Status : #{job.status}"
120
120
  puts "Type : #{job.type}"
121
121
  puts "Priority : #{job_priority_name_of(job.priority)}"
122
+ puts "Retry limit : #{job.retry_limit}"
122
123
  puts "Result : #{job.result_url}"
123
124
  puts "Database : #{job.db_name}"
124
125
  puts "Query : #{job.query}"
@@ -12,6 +12,7 @@ module Command
12
12
  result_user = nil
13
13
  result_ask_password = false
14
14
  priority = nil
15
+ retry_limit = nil
15
16
 
16
17
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
17
18
  db_name = s
@@ -46,6 +47,9 @@ module Command
46
47
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
47
48
  end
48
49
  }
50
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
51
+ retry_limit = i
52
+ }
49
53
 
50
54
  sql = op.cmd_parse
51
55
 
@@ -68,7 +72,7 @@ module Command
68
72
  # local existance check
69
73
  get_database(client, db_name)
70
74
 
71
- job = client.query(db_name, sql, result_url, priority)
75
+ job = client.query(db_name, sql, result_url, priority, retry_limit)
72
76
 
73
77
  $stderr.puts "Job #{job.job_id} is queued."
74
78
  $stderr.puts "Use '#{$prog} job:show #{job.job_id}' to show the status."
@@ -32,6 +32,7 @@ module Command
32
32
  result_user = nil
33
33
  result_ask_password = false
34
34
  priority = nil
35
+ retry_limit = nil
35
36
 
36
37
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
37
38
  db_name = s
@@ -57,6 +58,9 @@ module Command
57
58
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
58
59
  end
59
60
  }
61
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
62
+ retry_limit = i
63
+ }
60
64
 
61
65
  name, cron, sql = op.cmd_parse
62
66
 
@@ -76,7 +80,7 @@ module Command
76
80
  get_database(client, db_name)
77
81
 
78
82
  begin
79
- first_time = client.create_schedule(name, :cron=>cron, :query=>sql, :database=>db_name, :result=>result_url, :timezone=>timezone, :delay=>delay, :priority=>priority)
83
+ first_time = client.create_schedule(name, :cron=>cron, :query=>sql, :database=>db_name, :result=>result_url, :timezone=>timezone, :delay=>delay, :priority=>priority, :retry_limit=>retry_limit)
80
84
  rescue AlreadyExistsError
81
85
  cmd_debug_error $!
82
86
  $stderr.puts "Schedule '#{name}' already exists."
@@ -111,6 +115,7 @@ module Command
111
115
  timezone = nil
112
116
  delay = nil
113
117
  priority = nil
118
+ retry_limit = nil
114
119
 
115
120
  op.on('-s', '--schedule CRON', 'change the schedule') {|s|
116
121
  cron = s
@@ -136,6 +141,10 @@ module Command
136
141
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
137
142
  end
138
143
  }
144
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
145
+ retry_limit = i
146
+ }
147
+
139
148
 
140
149
  name = op.cmd_parse
141
150
 
@@ -147,6 +156,7 @@ module Command
147
156
  params['timezone'] = timezone if timezone
148
157
  params['delay'] = delay.to_s if delay
149
158
  params['priority'] = priority.to_s if priority
159
+ params['retry_limit'] = retry_limit.to_s if retry_limit
150
160
 
151
161
  if params.empty?
152
162
  $stderr.puts op.to_s
@@ -209,6 +219,7 @@ module Command
209
219
  puts "Next : #{s.next_time}"
210
220
  puts "Result : #{s.result_url}"
211
221
  puts "Priority : #{job_priority_name_of(s.priority)}"
222
+ puts "Retry limit : #{s.retry_limit}"
212
223
  puts "Database : #{s.database}"
213
224
  puts "Query : #{s.query}"
214
225
  end
@@ -1,6 +1,16 @@
1
1
 
2
2
  module TreasureData
3
+ # json and msgpack format supports array types with columns
4
+ #
5
+ # - when --column-header option
6
+ # ["a", "b", "c"] # first line is header
7
+ # ["v", 10, true] # array types, e.g. generate {"a" => "v", "b" => 10, "c" => true}
8
+ # ...
9
+ # - when --columns a,b,c
10
+ # ["v", 10, true] # array types
11
+ # ...
3
12
  class FileReader
13
+ require 'time'
4
14
  require 'zlib'
5
15
 
6
16
  class DecompressIOFilter
@@ -67,20 +77,36 @@ module TreasureData
67
77
  end
68
78
  end
69
79
 
70
- # TODO
71
- #class QuotedDelimiterParsingReader
72
- # def initialize(io, error, opts)
73
- # require 'strscan'
74
- # @io = io
75
- # @error = error
76
- # @delimiter_expr = opts[:delimiter_expr]
77
- # @quote_char = opts[:quote_char]
78
- # @escape_char = opts[:escape_char]
79
- # end
80
-
81
- # def forward
82
- # end
83
- #end
80
+ # TODO: encoding handling
81
+ class SeparatedValueParsingReader
82
+ def initialize(io, error, opts)
83
+ if encoding = opts[:encoding]
84
+ io.set_encoding(encoding, :invalid => :replace, :undef => :replace) if io.respond_to?(:set_encoding)
85
+ end
86
+
87
+ # csv module is pure Ruby implementation.
88
+ # So this may cause slow performance in large dataset.
89
+ csv_opts = {
90
+ :col_sep => opts[:delimiter_expr],
91
+ :row_sep => $/,
92
+ :skip_blanks => true
93
+ }
94
+ csv_opts[:quote_char] = opts[:quote_char] if opts[:quote_char]
95
+ begin
96
+ require 'fastercsv'
97
+ @io = FasterCSV.new(io, csv_opts)
98
+ rescue LoadError => e
99
+ require 'csv'
100
+ @io = CSV.new(io, csv_opts)
101
+ end
102
+ @error = error
103
+ # @escape_char = opts[:escape_char]
104
+ end
105
+
106
+ def forward
107
+ @io.readline
108
+ end
109
+ end
84
110
 
85
111
  class JSONParser
86
112
  def initialize(reader, error, opts)
@@ -102,34 +128,68 @@ module TreasureData
102
128
  end
103
129
  end
104
130
 
105
- # TODO
106
- #class ApacheParser
107
- # REGEXP = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
108
- #
109
- # def initialize(reader, error, opts)
110
- # @reader = reader
111
- # end
112
- #
113
- # def forward
114
- # while true
115
- # m = REGEXP.match(@reader.forward_row)
116
- # if m
117
- # h = {
118
- # 'host' => m[1],
119
- # 'user' => m[2],
120
- # 'time' => m[3],
121
- # 'method' => m[4],
122
- # 'path' => m[5],
123
- # 'code' => m[6],
124
- # 'size' => m[7].to_i,
125
- # 'referer' => m[8],
126
- # 'agent' => m[9],
127
- # }
128
- # return h
129
- # end
130
- # end
131
- # end
132
- #end
131
+ # TODO: Support user defined format like in_tail
132
+ module RegexpParserMixin
133
+ def initialize(reader, error, opts)
134
+ @reader = reader
135
+ @error = error
136
+ end
137
+
138
+ def forward
139
+ while true
140
+ line = @reader.forward_row
141
+ begin
142
+ m = @regexp.match(line)
143
+ unless m
144
+ @error.call("invalid #{@format} format", line)
145
+ next
146
+ end
147
+
148
+ return m.captures
149
+ rescue
150
+ @error.call("skipped: #{$!}", line)
151
+ next
152
+ end
153
+ end
154
+ end
155
+ end
156
+
157
+ # ApacheParser and SyslogParser is a port of old table:import's parsers
158
+
159
+ class ApacheParser
160
+ # 1.8 don't have named capture, so need column names.
161
+ COLUMNS = ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent']
162
+ TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %z"
163
+
164
+ include RegexpParserMixin
165
+
166
+ def initialize(reader, error, opts)
167
+ super
168
+
169
+ # e.g. 127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"
170
+ @format = 'apache'
171
+ @regexp = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
172
+ end
173
+ end
174
+
175
+ class SyslogParser
176
+ # This parser doesn't consider nil value.
177
+ # But td platform removes the key, which has nil value, in data import.
178
+ # So this is not critical in table:import.
179
+
180
+ COLUMNS = ['time', 'host', 'ident', 'pid', 'message']
181
+ TIME_FORMAT = "%b %d %H:%M:%S"
182
+
183
+ include RegexpParserMixin
184
+
185
+ def initialize(reader, error, opts)
186
+ super
187
+
188
+ # e.g. Dec 20 12:41:44 localhost kernel:10000 [4843680.692840] e1000e: eth2 NIC Link is Down
189
+ @format = 'syslog'
190
+ @regexp = /^([^ ]* [^ ]* [^ ]*) ([^ ]*) ([a-zA-Z0-9_\/\.\-]*)(?:\[([0-9]+)\])?[^\:]*\: *(.*)$/
191
+ end
192
+ end
133
193
 
134
194
  class AutoTypeConvertParserFilter
135
195
  def initialize(parser, error, opts)
@@ -173,7 +233,6 @@ module TreasureData
173
233
 
174
234
  class TimeParserFilter
175
235
  def initialize(parser, error, opts)
176
- require 'time'
177
236
  @parser = parser
178
237
  @error = error
179
238
  @time_column = opts[:time_column]
@@ -300,6 +359,7 @@ module TreasureData
300
359
  if s.to_i.to_s == s
301
360
  @opts[:time_value] = s.to_i
302
361
  else
362
+ require 'time'
303
363
  @opts[:time_value] = Time.parse(s).to_i
304
364
  end
305
365
  }
@@ -319,12 +379,17 @@ module TreasureData
319
379
  when 'tsv'
320
380
  @format = 'text'
321
381
  @opts[:delimiter_expr] = /\t/
322
- #when 'apache'
323
- # @format = 'apache'
324
- # @opts[:column_names] = ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent']
325
- # @opts[:null_expr] = /\A(?:\-|)\z/
326
- # @opts[:time_column] = 'time'
327
- # @opts[:time_format] = '%d/%b/%Y:%H:%M:%S %z'
382
+ when 'apache'
383
+ @format = name
384
+ @opts[:column_names] = ApacheParser::COLUMNS
385
+ @opts[:null_expr] = /\A(?:\-|)\z/
386
+ @opts[:time_column] = 'time'
387
+ @opts[:time_format] = ApacheParser::TIME_FORMAT
388
+ when 'syslog'
389
+ @format = name
390
+ @opts[:column_names] = SyslogParser::COLUMNS
391
+ @opts[:time_column] = 'time'
392
+ @opts[:time_format] = SyslogParser::TIME_FORMAT
328
393
  when 'msgpack'
329
394
  @format = 'msgpack'
330
395
  when 'json'
@@ -360,7 +425,30 @@ module TreasureData
360
425
  end
361
426
  }
362
427
 
363
- #when 'apache'
428
+ when 'apache', 'syslog'
429
+ Proc.new {|io,error|
430
+ io = DecompressIOFilter.filter(io, error, opts)
431
+ reader = LineReader.new(io, error, opts)
432
+ parser = if @format == 'apache'
433
+ ApacheParser.new(reader, error, opts)
434
+ else
435
+ SyslogParser.new(reader, error, opts)
436
+ end
437
+ if opts[:column_names]
438
+ column_names = opts[:column_names]
439
+ else
440
+ raise "--columns option is required"
441
+ end
442
+ unless opts[:all_string]
443
+ parser = AutoTypeConvertParserFilter.new(parser, error, opts)
444
+ end
445
+ parser = HashBuilder.new(parser, error, column_names)
446
+ if opts[:time_value]
447
+ parser = SetTimeParserFilter.new(parser, error, opts)
448
+ else
449
+ parser = TimeParserFilter.new(parser, error, opts)
450
+ end
451
+ }
364
452
 
365
453
  when 'json'
366
454
  Proc.new {|io,error|
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.65'
3
+ VERSION = '0.10.66'
4
4
 
5
5
  end
@@ -0,0 +1,236 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe 'FileReader filters' do
10
+ include_context 'error_proc'
11
+
12
+ let :delimiter do
13
+ "\t"
14
+ end
15
+
16
+ let :dataset do
17
+ [
18
+ ['hoge', 12345, true, 'null', Time.now.to_s],
19
+ ['foo', 34567, false, 'null', Time.now.to_s],
20
+ ['piyo', 56789, true, nil, Time.now.to_s],
21
+ ]
22
+ end
23
+
24
+ let :lines do
25
+ dataset.map { |data| data.map(&:to_s).join(delimiter) }
26
+ end
27
+
28
+ let :parser do
29
+ io = StringIO.new(lines.join("\n"))
30
+ reader = FileReader::LineReader.new(io, error, {})
31
+ FileReader::DelimiterParser.new(reader, error, :delimiter_expr => delimiter)
32
+ end
33
+
34
+ describe FileReader::AutoTypeConvertParserFilter do
35
+ let :options do
36
+ {
37
+ :null_expr => /\A(?:nil||\-|\\N)\z/i,
38
+ :true_expr => /\A(?:true)\z/i,
39
+ :false_expr => /\A(?:false)\z/i,
40
+ }
41
+ end
42
+
43
+ it 'initialize' do
44
+ filter = FileReader::AutoTypeConvertParserFilter.new(parser, error, options)
45
+ filter.should_not be_nil
46
+ end
47
+
48
+ context 'after initialization' do
49
+ let :filter do
50
+ FileReader::AutoTypeConvertParserFilter.new(parser, error, options)
51
+ end
52
+
53
+ it 'forward returns one converted line' do
54
+ filter.forward.should == dataset[0]
55
+ end
56
+
57
+ it 'feeds all lines' do
58
+ begin
59
+ i = 0
60
+ while line = filter.forward
61
+ line.should == dataset[i]
62
+ i += 1
63
+ end
64
+ rescue
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ describe FileReader::HashBuilder do
71
+ let :columns do
72
+ ['str', 'num', 'bool', 'null', 'log_at']
73
+ end
74
+
75
+ let :built_dataset do
76
+ # [{"str" => "hoge", "num" => "12345", "bool" => "true" , "null" =>"null", "log_at" => "2012-12-26 05:14:09 +0900"}, ...]
77
+ dataset.map { |data| Hash[columns.zip(data.map(&:to_s))]}
78
+ end
79
+
80
+ it 'initialize' do
81
+ builder = FileReader::HashBuilder.new(parser, error, columns)
82
+ builder.should_not be_nil
83
+ end
84
+
85
+ context 'after initialization' do
86
+ let :builder do
87
+ FileReader::HashBuilder.new(parser, error, columns)
88
+ end
89
+
90
+ it 'forward returns one converted line' do
91
+ builder.forward.should == built_dataset[0]
92
+ end
93
+
94
+ it 'feeds all lines' do
95
+ begin
96
+ i = 0
97
+ while line = builder.forward
98
+ line.should == built_dataset[i]
99
+ i += 1
100
+ end
101
+ rescue
102
+ end
103
+ end
104
+
105
+ describe FileReader::TimeParserFilter do
106
+ it "can't be initialized without :time_column option" do
107
+ expect {
108
+ FileReader::TimeParserFilter.new(parser, error, {})
109
+ }.to raise_error(Exception, /--time-column/)
110
+ end
111
+
112
+ it 'initialize' do
113
+ filter = FileReader::TimeParserFilter.new(builder, error, :time_column => 'log_at')
114
+ filter.should_not be_nil
115
+ end
116
+
117
+ context 'after initialization' do
118
+ let :timed_dataset do
119
+ require 'time'
120
+ built_dataset.each { |data| data['time'] = Time.parse(data['log_at']).to_i }
121
+ end
122
+
123
+ let :filter do
124
+ FileReader::TimeParserFilter.new(builder, error, :time_column => 'log_at')
125
+ end
126
+
127
+ it 'forward returns one parse line with parsed log_at' do
128
+ filter.forward.should == timed_dataset[0]
129
+ end
130
+
131
+ it 'feeds all lines' do
132
+ begin
133
+ i = 0
134
+ while line = filter.forward
135
+ line.should == timed_dataset[i]
136
+ i += 1
137
+ end
138
+ rescue
139
+ end
140
+ end
141
+
142
+ context 'missing log_at column lines' do
143
+ let :columns do
144
+ ['str', 'num', 'bool', 'null', 'created_at']
145
+ end
146
+
147
+ let :error_pattern do
148
+ /^time column 'log_at' is missing/
149
+ end
150
+
151
+ it 'feeds all lines' do
152
+ i = 0
153
+ begin
154
+ while line = filter.forward
155
+ i += 1
156
+ end
157
+ rescue RSpec::Expectations::ExpectationNotMetError => e
158
+ fail
159
+ rescue
160
+ i.should == 0
161
+ end
162
+ end
163
+ end
164
+
165
+ context 'invalid time format' do
166
+ let :error_pattern do
167
+ /^invalid time format/
168
+ end
169
+
170
+ [{:time_column => 'log_at', :time_format => "%d"},
171
+ {:time_column => 'str'}].each { |options|
172
+ let :filter do
173
+ FileReader::TimeParserFilter.new(builder, error, options)
174
+ end
175
+
176
+ it 'feeds all lines' do
177
+ i = 0
178
+ begin
179
+ while line = filter.forward
180
+ i += 1
181
+ end
182
+ rescue RSpec::Expectations::ExpectationNotMetError => e
183
+ fail
184
+ rescue
185
+ i.should == 0
186
+ end
187
+ end
188
+ }
189
+ end
190
+ end
191
+ end
192
+
193
+ describe FileReader::SetTimeParserFilter do
194
+ it "can't be initialized without :time_value option" do
195
+ expect {
196
+ FileReader::SetTimeParserFilter.new(parser, error, {})
197
+ }.to raise_error(Exception, /--time-value/)
198
+ end
199
+
200
+ it 'initialize' do
201
+ filter = FileReader::SetTimeParserFilter.new(builder, error, :time_value => Time.now.to_i)
202
+ filter.should_not be_nil
203
+ end
204
+
205
+ context 'after initialization' do
206
+ let :time_value do
207
+ Time.now.to_i
208
+ end
209
+
210
+ let :timed_dataset do
211
+ built_dataset.each { |data| data['time'] = time_value }
212
+ end
213
+
214
+ let :filter do
215
+ FileReader::SetTimeParserFilter.new(builder, error, :time_value => time_value)
216
+ end
217
+
218
+ it 'forward returns one converted line with time' do
219
+ filter.forward.should == timed_dataset[0]
220
+ end
221
+
222
+ it 'feeds all lines' do
223
+ begin
224
+ i = 0
225
+ while line = filter.forward
226
+ line.should == timed_dataset[i]
227
+ i += 1
228
+ end
229
+ rescue
230
+ end
231
+ end
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end