td 0.10.65 → 0.10.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,3 +1,10 @@
1
+
2
+ == 2013-01-16 version 0.10.66
3
+
4
+ * td-client v0.8.42
5
+ * query, sched:create, sched:update subcommands support -R, --retry option
6
+
7
+
1
8
  == 2012-12-27 version 0.10.65
2
9
 
3
10
  * td-client v0.8.40
@@ -206,8 +206,8 @@ module Command
206
206
  record = {}
207
207
 
208
208
  cap = m.captures
209
- @names.each_with_index {|name,i|
210
- if value = cap[i]
209
+ @names.each_with_index {|name,cap_i|
210
+ if value = cap[cap_i]
211
211
  if name == "time"
212
212
  value = parse_time(value).to_i
213
213
  end
@@ -275,7 +275,7 @@ module Command
275
275
 
276
276
  class MessagePackParser
277
277
  def initialize(time_key)
278
- require 'json'
278
+ require 'msgpack'
279
279
  @time_key = time_key
280
280
  end
281
281
 
@@ -119,6 +119,7 @@ module Command
119
119
  puts "Status : #{job.status}"
120
120
  puts "Type : #{job.type}"
121
121
  puts "Priority : #{job_priority_name_of(job.priority)}"
122
+ puts "Retry limit : #{job.retry_limit}"
122
123
  puts "Result : #{job.result_url}"
123
124
  puts "Database : #{job.db_name}"
124
125
  puts "Query : #{job.query}"
@@ -12,6 +12,7 @@ module Command
12
12
  result_user = nil
13
13
  result_ask_password = false
14
14
  priority = nil
15
+ retry_limit = nil
15
16
 
16
17
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
17
18
  db_name = s
@@ -46,6 +47,9 @@ module Command
46
47
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
47
48
  end
48
49
  }
50
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
51
+ retry_limit = i
52
+ }
49
53
 
50
54
  sql = op.cmd_parse
51
55
 
@@ -68,7 +72,7 @@ module Command
68
72
  # local existance check
69
73
  get_database(client, db_name)
70
74
 
71
- job = client.query(db_name, sql, result_url, priority)
75
+ job = client.query(db_name, sql, result_url, priority, retry_limit)
72
76
 
73
77
  $stderr.puts "Job #{job.job_id} is queued."
74
78
  $stderr.puts "Use '#{$prog} job:show #{job.job_id}' to show the status."
@@ -32,6 +32,7 @@ module Command
32
32
  result_user = nil
33
33
  result_ask_password = false
34
34
  priority = nil
35
+ retry_limit = nil
35
36
 
36
37
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
37
38
  db_name = s
@@ -57,6 +58,9 @@ module Command
57
58
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
58
59
  end
59
60
  }
61
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
62
+ retry_limit = i
63
+ }
60
64
 
61
65
  name, cron, sql = op.cmd_parse
62
66
 
@@ -76,7 +80,7 @@ module Command
76
80
  get_database(client, db_name)
77
81
 
78
82
  begin
79
- first_time = client.create_schedule(name, :cron=>cron, :query=>sql, :database=>db_name, :result=>result_url, :timezone=>timezone, :delay=>delay, :priority=>priority)
83
+ first_time = client.create_schedule(name, :cron=>cron, :query=>sql, :database=>db_name, :result=>result_url, :timezone=>timezone, :delay=>delay, :priority=>priority, :retry_limit=>retry_limit)
80
84
  rescue AlreadyExistsError
81
85
  cmd_debug_error $!
82
86
  $stderr.puts "Schedule '#{name}' already exists."
@@ -111,6 +115,7 @@ module Command
111
115
  timezone = nil
112
116
  delay = nil
113
117
  priority = nil
118
+ retry_limit = nil
114
119
 
115
120
  op.on('-s', '--schedule CRON', 'change the schedule') {|s|
116
121
  cron = s
@@ -136,6 +141,10 @@ module Command
136
141
  raise "unknown priority #{s.inspect} should be -2 (very-low), -1 (low), 0 (normal), 1 (high) or 2 (very-high)"
137
142
  end
138
143
  }
144
+ op.on('-R', '--retry COUNT', 'automatic retrying count', Integer) {|i|
145
+ retry_limit = i
146
+ }
147
+
139
148
 
140
149
  name = op.cmd_parse
141
150
 
@@ -147,6 +156,7 @@ module Command
147
156
  params['timezone'] = timezone if timezone
148
157
  params['delay'] = delay.to_s if delay
149
158
  params['priority'] = priority.to_s if priority
159
+ params['retry_limit'] = retry_limit.to_s if retry_limit
150
160
 
151
161
  if params.empty?
152
162
  $stderr.puts op.to_s
@@ -209,6 +219,7 @@ module Command
209
219
  puts "Next : #{s.next_time}"
210
220
  puts "Result : #{s.result_url}"
211
221
  puts "Priority : #{job_priority_name_of(s.priority)}"
222
+ puts "Retry limit : #{s.retry_limit}"
212
223
  puts "Database : #{s.database}"
213
224
  puts "Query : #{s.query}"
214
225
  end
@@ -1,6 +1,16 @@
1
1
 
2
2
  module TreasureData
3
+ # json and msgpack format supports array types with columns
4
+ #
5
+ # - when --column-header option
6
+ # ["a", "b", "c"] # first line is header
7
+ # ["v", 10, true] # array types, e.g. generate {"a" => "v", "b" => 10, "c" => true}
8
+ # ...
9
+ # - when --columns a,b,c
10
+ # ["v", 10, true] # array types
11
+ # ...
3
12
  class FileReader
13
+ require 'time'
4
14
  require 'zlib'
5
15
 
6
16
  class DecompressIOFilter
@@ -67,20 +77,36 @@ module TreasureData
67
77
  end
68
78
  end
69
79
 
70
- # TODO
71
- #class QuotedDelimiterParsingReader
72
- # def initialize(io, error, opts)
73
- # require 'strscan'
74
- # @io = io
75
- # @error = error
76
- # @delimiter_expr = opts[:delimiter_expr]
77
- # @quote_char = opts[:quote_char]
78
- # @escape_char = opts[:escape_char]
79
- # end
80
-
81
- # def forward
82
- # end
83
- #end
80
+ # TODO: encoding handling
81
+ class SeparatedValueParsingReader
82
+ def initialize(io, error, opts)
83
+ if encoding = opts[:encoding]
84
+ io.set_encoding(encoding, :invalid => :replace, :undef => :replace) if io.respond_to?(:set_encoding)
85
+ end
86
+
87
+ # csv module is pure Ruby implementation.
88
+ # So this may cause slow performance in large dataset.
89
+ csv_opts = {
90
+ :col_sep => opts[:delimiter_expr],
91
+ :row_sep => $/,
92
+ :skip_blanks => true
93
+ }
94
+ csv_opts[:quote_char] = opts[:quote_char] if opts[:quote_char]
95
+ begin
96
+ require 'fastercsv'
97
+ @io = FasterCSV.new(io, csv_opts)
98
+ rescue LoadError => e
99
+ require 'csv'
100
+ @io = CSV.new(io, csv_opts)
101
+ end
102
+ @error = error
103
+ # @escape_char = opts[:escape_char]
104
+ end
105
+
106
+ def forward
107
+ @io.readline
108
+ end
109
+ end
84
110
 
85
111
  class JSONParser
86
112
  def initialize(reader, error, opts)
@@ -102,34 +128,68 @@ module TreasureData
102
128
  end
103
129
  end
104
130
 
105
- # TODO
106
- #class ApacheParser
107
- # REGEXP = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
108
- #
109
- # def initialize(reader, error, opts)
110
- # @reader = reader
111
- # end
112
- #
113
- # def forward
114
- # while true
115
- # m = REGEXP.match(@reader.forward_row)
116
- # if m
117
- # h = {
118
- # 'host' => m[1],
119
- # 'user' => m[2],
120
- # 'time' => m[3],
121
- # 'method' => m[4],
122
- # 'path' => m[5],
123
- # 'code' => m[6],
124
- # 'size' => m[7].to_i,
125
- # 'referer' => m[8],
126
- # 'agent' => m[9],
127
- # }
128
- # return h
129
- # end
130
- # end
131
- # end
132
- #end
131
+ # TODO: Support user defined format like in_tail
132
+ module RegexpParserMixin
133
+ def initialize(reader, error, opts)
134
+ @reader = reader
135
+ @error = error
136
+ end
137
+
138
+ def forward
139
+ while true
140
+ line = @reader.forward_row
141
+ begin
142
+ m = @regexp.match(line)
143
+ unless m
144
+ @error.call("invalid #{@format} format", line)
145
+ next
146
+ end
147
+
148
+ return m.captures
149
+ rescue
150
+ @error.call("skipped: #{$!}", line)
151
+ next
152
+ end
153
+ end
154
+ end
155
+ end
156
+
157
+ # ApacheParser and SyslogParser is a port of old table:import's parsers
158
+
159
+ class ApacheParser
160
+ # 1.8 don't have named capture, so need column names.
161
+ COLUMNS = ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent']
162
+ TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %z"
163
+
164
+ include RegexpParserMixin
165
+
166
+ def initialize(reader, error, opts)
167
+ super
168
+
169
+ # e.g. 127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"
170
+ @format = 'apache'
171
+ @regexp = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
172
+ end
173
+ end
174
+
175
+ class SyslogParser
176
+ # This parser doesn't consider nil value.
177
+ # But td platform removes the key, which has nil value, in data import.
178
+ # So this is not critical in table:import.
179
+
180
+ COLUMNS = ['time', 'host', 'ident', 'pid', 'message']
181
+ TIME_FORMAT = "%b %d %H:%M:%S"
182
+
183
+ include RegexpParserMixin
184
+
185
+ def initialize(reader, error, opts)
186
+ super
187
+
188
+ # e.g. Dec 20 12:41:44 localhost kernel:10000 [4843680.692840] e1000e: eth2 NIC Link is Down
189
+ @format = 'syslog'
190
+ @regexp = /^([^ ]* [^ ]* [^ ]*) ([^ ]*) ([a-zA-Z0-9_\/\.\-]*)(?:\[([0-9]+)\])?[^\:]*\: *(.*)$/
191
+ end
192
+ end
133
193
 
134
194
  class AutoTypeConvertParserFilter
135
195
  def initialize(parser, error, opts)
@@ -173,7 +233,6 @@ module TreasureData
173
233
 
174
234
  class TimeParserFilter
175
235
  def initialize(parser, error, opts)
176
- require 'time'
177
236
  @parser = parser
178
237
  @error = error
179
238
  @time_column = opts[:time_column]
@@ -300,6 +359,7 @@ module TreasureData
300
359
  if s.to_i.to_s == s
301
360
  @opts[:time_value] = s.to_i
302
361
  else
362
+ require 'time'
303
363
  @opts[:time_value] = Time.parse(s).to_i
304
364
  end
305
365
  }
@@ -319,12 +379,17 @@ module TreasureData
319
379
  when 'tsv'
320
380
  @format = 'text'
321
381
  @opts[:delimiter_expr] = /\t/
322
- #when 'apache'
323
- # @format = 'apache'
324
- # @opts[:column_names] = ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent']
325
- # @opts[:null_expr] = /\A(?:\-|)\z/
326
- # @opts[:time_column] = 'time'
327
- # @opts[:time_format] = '%d/%b/%Y:%H:%M:%S %z'
382
+ when 'apache'
383
+ @format = name
384
+ @opts[:column_names] = ApacheParser::COLUMNS
385
+ @opts[:null_expr] = /\A(?:\-|)\z/
386
+ @opts[:time_column] = 'time'
387
+ @opts[:time_format] = ApacheParser::TIME_FORMAT
388
+ when 'syslog'
389
+ @format = name
390
+ @opts[:column_names] = SyslogParser::COLUMNS
391
+ @opts[:time_column] = 'time'
392
+ @opts[:time_format] = SyslogParser::TIME_FORMAT
328
393
  when 'msgpack'
329
394
  @format = 'msgpack'
330
395
  when 'json'
@@ -360,7 +425,30 @@ module TreasureData
360
425
  end
361
426
  }
362
427
 
363
- #when 'apache'
428
+ when 'apache', 'syslog'
429
+ Proc.new {|io,error|
430
+ io = DecompressIOFilter.filter(io, error, opts)
431
+ reader = LineReader.new(io, error, opts)
432
+ parser = if @format == 'apache'
433
+ ApacheParser.new(reader, error, opts)
434
+ else
435
+ SyslogParser.new(reader, error, opts)
436
+ end
437
+ if opts[:column_names]
438
+ column_names = opts[:column_names]
439
+ else
440
+ raise "--columns option is required"
441
+ end
442
+ unless opts[:all_string]
443
+ parser = AutoTypeConvertParserFilter.new(parser, error, opts)
444
+ end
445
+ parser = HashBuilder.new(parser, error, column_names)
446
+ if opts[:time_value]
447
+ parser = SetTimeParserFilter.new(parser, error, opts)
448
+ else
449
+ parser = TimeParserFilter.new(parser, error, opts)
450
+ end
451
+ }
364
452
 
365
453
  when 'json'
366
454
  Proc.new {|io,error|
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.65'
3
+ VERSION = '0.10.66'
4
4
 
5
5
  end
@@ -0,0 +1,236 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe 'FileReader filters' do
10
+ include_context 'error_proc'
11
+
12
+ let :delimiter do
13
+ "\t"
14
+ end
15
+
16
+ let :dataset do
17
+ [
18
+ ['hoge', 12345, true, 'null', Time.now.to_s],
19
+ ['foo', 34567, false, 'null', Time.now.to_s],
20
+ ['piyo', 56789, true, nil, Time.now.to_s],
21
+ ]
22
+ end
23
+
24
+ let :lines do
25
+ dataset.map { |data| data.map(&:to_s).join(delimiter) }
26
+ end
27
+
28
+ let :parser do
29
+ io = StringIO.new(lines.join("\n"))
30
+ reader = FileReader::LineReader.new(io, error, {})
31
+ FileReader::DelimiterParser.new(reader, error, :delimiter_expr => delimiter)
32
+ end
33
+
34
+ describe FileReader::AutoTypeConvertParserFilter do
35
+ let :options do
36
+ {
37
+ :null_expr => /\A(?:nil||\-|\\N)\z/i,
38
+ :true_expr => /\A(?:true)\z/i,
39
+ :false_expr => /\A(?:false)\z/i,
40
+ }
41
+ end
42
+
43
+ it 'initialize' do
44
+ filter = FileReader::AutoTypeConvertParserFilter.new(parser, error, options)
45
+ filter.should_not be_nil
46
+ end
47
+
48
+ context 'after initialization' do
49
+ let :filter do
50
+ FileReader::AutoTypeConvertParserFilter.new(parser, error, options)
51
+ end
52
+
53
+ it 'forward returns one converted line' do
54
+ filter.forward.should == dataset[0]
55
+ end
56
+
57
+ it 'feeds all lines' do
58
+ begin
59
+ i = 0
60
+ while line = filter.forward
61
+ line.should == dataset[i]
62
+ i += 1
63
+ end
64
+ rescue
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ describe FileReader::HashBuilder do
71
+ let :columns do
72
+ ['str', 'num', 'bool', 'null', 'log_at']
73
+ end
74
+
75
+ let :built_dataset do
76
+ # [{"str" => "hoge", "num" => "12345", "bool" => "true" , "null" =>"null", "log_at" => "2012-12-26 05:14:09 +0900"}, ...]
77
+ dataset.map { |data| Hash[columns.zip(data.map(&:to_s))]}
78
+ end
79
+
80
+ it 'initialize' do
81
+ builder = FileReader::HashBuilder.new(parser, error, columns)
82
+ builder.should_not be_nil
83
+ end
84
+
85
+ context 'after initialization' do
86
+ let :builder do
87
+ FileReader::HashBuilder.new(parser, error, columns)
88
+ end
89
+
90
+ it 'forward returns one converted line' do
91
+ builder.forward.should == built_dataset[0]
92
+ end
93
+
94
+ it 'feeds all lines' do
95
+ begin
96
+ i = 0
97
+ while line = builder.forward
98
+ line.should == built_dataset[i]
99
+ i += 1
100
+ end
101
+ rescue
102
+ end
103
+ end
104
+
105
+ describe FileReader::TimeParserFilter do
106
+ it "can't be initialized without :time_column option" do
107
+ expect {
108
+ FileReader::TimeParserFilter.new(parser, error, {})
109
+ }.to raise_error(Exception, /--time-column/)
110
+ end
111
+
112
+ it 'initialize' do
113
+ filter = FileReader::TimeParserFilter.new(builder, error, :time_column => 'log_at')
114
+ filter.should_not be_nil
115
+ end
116
+
117
+ context 'after initialization' do
118
+ let :timed_dataset do
119
+ require 'time'
120
+ built_dataset.each { |data| data['time'] = Time.parse(data['log_at']).to_i }
121
+ end
122
+
123
+ let :filter do
124
+ FileReader::TimeParserFilter.new(builder, error, :time_column => 'log_at')
125
+ end
126
+
127
+ it 'forward returns one parse line with parsed log_at' do
128
+ filter.forward.should == timed_dataset[0]
129
+ end
130
+
131
+ it 'feeds all lines' do
132
+ begin
133
+ i = 0
134
+ while line = filter.forward
135
+ line.should == timed_dataset[i]
136
+ i += 1
137
+ end
138
+ rescue
139
+ end
140
+ end
141
+
142
+ context 'missing log_at column lines' do
143
+ let :columns do
144
+ ['str', 'num', 'bool', 'null', 'created_at']
145
+ end
146
+
147
+ let :error_pattern do
148
+ /^time column 'log_at' is missing/
149
+ end
150
+
151
+ it 'feeds all lines' do
152
+ i = 0
153
+ begin
154
+ while line = filter.forward
155
+ i += 1
156
+ end
157
+ rescue RSpec::Expectations::ExpectationNotMetError => e
158
+ fail
159
+ rescue
160
+ i.should == 0
161
+ end
162
+ end
163
+ end
164
+
165
+ context 'invalid time format' do
166
+ let :error_pattern do
167
+ /^invalid time format/
168
+ end
169
+
170
+ [{:time_column => 'log_at', :time_format => "%d"},
171
+ {:time_column => 'str'}].each { |options|
172
+ let :filter do
173
+ FileReader::TimeParserFilter.new(builder, error, options)
174
+ end
175
+
176
+ it 'feeds all lines' do
177
+ i = 0
178
+ begin
179
+ while line = filter.forward
180
+ i += 1
181
+ end
182
+ rescue RSpec::Expectations::ExpectationNotMetError => e
183
+ fail
184
+ rescue
185
+ i.should == 0
186
+ end
187
+ end
188
+ }
189
+ end
190
+ end
191
+ end
192
+
193
+ describe FileReader::SetTimeParserFilter do
194
+ it "can't be initialized without :time_value option" do
195
+ expect {
196
+ FileReader::SetTimeParserFilter.new(parser, error, {})
197
+ }.to raise_error(Exception, /--time-value/)
198
+ end
199
+
200
+ it 'initialize' do
201
+ filter = FileReader::SetTimeParserFilter.new(builder, error, :time_value => Time.now.to_i)
202
+ filter.should_not be_nil
203
+ end
204
+
205
+ context 'after initialization' do
206
+ let :time_value do
207
+ Time.now.to_i
208
+ end
209
+
210
+ let :timed_dataset do
211
+ built_dataset.each { |data| data['time'] = time_value }
212
+ end
213
+
214
+ let :filter do
215
+ FileReader::SetTimeParserFilter.new(builder, error, :time_value => time_value)
216
+ end
217
+
218
+ it 'forward returns one converted line with time' do
219
+ filter.forward.should == timed_dataset[0]
220
+ end
221
+
222
+ it 'feeds all lines' do
223
+ begin
224
+ i = 0
225
+ while line = filter.forward
226
+ line.should == timed_dataset[i]
227
+ i += 1
228
+ end
229
+ rescue
230
+ end
231
+ end
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end