td 0.10.44 → 0.10.45
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/lib/td/file_reader.rb +90 -37
- data/lib/td/version.rb +1 -1
- metadata +2 -2
data/ChangeLog
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
|
2
|
+
== 2012-09-12 version 0.10.45
|
3
|
+
|
4
|
+
* bulk_import:prepare_parts supports --true and --false type conversion options
|
5
|
+
* bulk_import:prepare_parts supports --time-value option
|
6
|
+
* bulk_import:prepare_parts convers /null/i to NULL by default
|
7
|
+
|
8
|
+
|
2
9
|
== 2012-09-04 version 0.10.44
|
3
10
|
|
4
11
|
* Added password:change subcommand
|
data/lib/td/file_reader.rb
CHANGED
@@ -10,8 +10,8 @@ module TreasureData
|
|
10
10
|
@u = MessagePack::Unpacker.new(@io)
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
14
|
-
@u.
|
13
|
+
def forward
|
14
|
+
@u.forward
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -25,7 +25,7 @@ module TreasureData
|
|
25
25
|
@error = error
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
28
|
+
def forward_row
|
29
29
|
@io.readline($/).chomp
|
30
30
|
end
|
31
31
|
end
|
@@ -34,15 +34,11 @@ module TreasureData
|
|
34
34
|
def initialize(reader, error, opts)
|
35
35
|
@reader = reader
|
36
36
|
@delimiter_expr = opts[:delimiter_expr]
|
37
|
-
@null_expr = opts[:null_expr]
|
38
37
|
end
|
39
38
|
|
40
|
-
def
|
41
|
-
row = @reader.
|
39
|
+
def forward
|
40
|
+
row = @reader.forward_row
|
42
41
|
array = row.split(@delimiter_expr)
|
43
|
-
array.map! {|x|
|
44
|
-
@null_expr =~ x ? nil : x
|
45
|
-
}
|
46
42
|
end
|
47
43
|
end
|
48
44
|
|
@@ -57,7 +53,7 @@ module TreasureData
|
|
57
53
|
# @escape_char = opts[:escape_char]
|
58
54
|
# end
|
59
55
|
|
60
|
-
# def
|
56
|
+
# def forward
|
61
57
|
# end
|
62
58
|
#end
|
63
59
|
|
@@ -67,9 +63,9 @@ module TreasureData
|
|
67
63
|
@error = error
|
68
64
|
end
|
69
65
|
|
70
|
-
def
|
66
|
+
def forward
|
71
67
|
while true
|
72
|
-
line = @reader.
|
68
|
+
line = @reader.forward_row
|
73
69
|
begin
|
74
70
|
return JSON.parse(line)
|
75
71
|
rescue
|
@@ -88,9 +84,9 @@ module TreasureData
|
|
88
84
|
# @reader = reader
|
89
85
|
# end
|
90
86
|
#
|
91
|
-
# def
|
87
|
+
# def forward
|
92
88
|
# while true
|
93
|
-
# m = REGEXP.match(@reader.
|
89
|
+
# m = REGEXP.match(@reader.forward_row)
|
94
90
|
# if m
|
95
91
|
# h = {
|
96
92
|
# 'host' => m[1],
|
@@ -110,16 +106,29 @@ module TreasureData
|
|
110
106
|
#end
|
111
107
|
|
112
108
|
class AutoTypeConvertParserFilter
|
113
|
-
def initialize(parser, error)
|
109
|
+
def initialize(parser, error, opts)
|
114
110
|
@parser = parser
|
111
|
+
@null_expr = opts[:null_expr]
|
112
|
+
@true_expr = opts[:true_expr]
|
113
|
+
@false_expr = opts[:false_expr]
|
115
114
|
end
|
116
115
|
|
117
|
-
def
|
118
|
-
array = @parser.
|
116
|
+
def forward
|
117
|
+
array = @parser.forward
|
119
118
|
array.map! {|s|
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
if !s.is_a?(String)
|
120
|
+
s
|
121
|
+
elsif s =~ @null_expr
|
122
|
+
nil
|
123
|
+
elsif s =~ @true_expr
|
124
|
+
true
|
125
|
+
elsif s =~ @false_expr
|
126
|
+
false
|
127
|
+
else
|
128
|
+
# nil.to_i == 0 != nil.to_s
|
129
|
+
i = s.to_i
|
130
|
+
i.to_s == s ? i : s
|
131
|
+
end
|
123
132
|
}
|
124
133
|
end
|
125
134
|
end
|
@@ -130,8 +139,8 @@ module TreasureData
|
|
130
139
|
@columns = columns
|
131
140
|
end
|
132
141
|
|
133
|
-
def
|
134
|
-
array = @parser.
|
142
|
+
def forward
|
143
|
+
array = @parser.forward
|
135
144
|
Hash[@columns.zip(array)]
|
136
145
|
end
|
137
146
|
end
|
@@ -143,14 +152,14 @@ module TreasureData
|
|
143
152
|
@error = error
|
144
153
|
@time_column = opts[:time_column]
|
145
154
|
unless @time_column
|
146
|
-
raise '-
|
155
|
+
raise '--time-column or --time-value option is required'
|
147
156
|
end
|
148
157
|
@time_format = opts[:time_format]
|
149
158
|
end
|
150
159
|
|
151
|
-
def
|
160
|
+
def forward
|
152
161
|
while true
|
153
|
-
row = @parser.
|
162
|
+
row = @parser.forward
|
154
163
|
tval = row[@time_column]
|
155
164
|
|
156
165
|
unless tval
|
@@ -186,12 +195,31 @@ module TreasureData
|
|
186
195
|
end
|
187
196
|
end
|
188
197
|
|
198
|
+
class SetTimeParserFilter
|
199
|
+
def initialize(parser, error, opts)
|
200
|
+
@parser = parser
|
201
|
+
@error = error
|
202
|
+
@time_value = opts[:time_value]
|
203
|
+
unless @time_value
|
204
|
+
raise '--time-column or --time-value option is required'
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def forward
|
209
|
+
row = @parser.forward
|
210
|
+
row['time'] = @time_value
|
211
|
+
row
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
189
215
|
def initialize
|
190
216
|
@format = "text"
|
191
217
|
@default_opts = {
|
192
218
|
:delimiter_expr => /\t|,/,
|
193
219
|
#:line_delimiter_expr => /\r?\n/,
|
194
|
-
:null_expr => /\A(
|
220
|
+
:null_expr => /\A(?:null||\-|\\N)\z/i,
|
221
|
+
:true_expr => /\A(?:true)\z/i,
|
222
|
+
:false_expr => /\A(?:false)\z/i,
|
195
223
|
#:quote_char => "\"",
|
196
224
|
}
|
197
225
|
@opts = {}
|
@@ -211,15 +239,21 @@ module TreasureData
|
|
211
239
|
op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
|
212
240
|
@opts[:column_header] = b
|
213
241
|
}
|
214
|
-
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].
|
242
|
+
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].to_s})") {|s|
|
215
243
|
@opts[:delimiter_expr] = Regexp.new(s)
|
216
244
|
}
|
217
245
|
#op.on('-D', '--line-delimiter REGEX', "delimiter between rows (default: #{@default_opts[:line_delimiter_expr].inspect[1..-2]})") {|s|
|
218
246
|
# @opts[:line_delimiter_expr] = Regexp.new(s)
|
219
247
|
#}
|
220
|
-
op.on('
|
248
|
+
op.on('--null REGEX', "null expression for the automatic type conversion (default: #{@default_opts[:null_expr].to_s})") {|s|
|
221
249
|
@opts[:null_expr] = Regexp.new(s)
|
222
250
|
}
|
251
|
+
op.on('--true REGEX', "true expression for the automatic type conversion (default: #{@default_opts[:true_expr].to_s})") {|s|
|
252
|
+
@opts[:true_expr] = Regexp.new(s)
|
253
|
+
}
|
254
|
+
op.on('--false REGEX', "false expression for the automatic type conversion (default: #{@default_opts[:false_expr].to_s})") {|s|
|
255
|
+
@opts[:false_expr] = Regexp.new(s)
|
256
|
+
}
|
223
257
|
# TODO
|
224
258
|
#op.on('-E', '--escape CHAR', "escape character (default: no escape character)") {|s|
|
225
259
|
# @opts[:escape_char] = s
|
@@ -230,12 +264,19 @@ module TreasureData
|
|
230
264
|
op.on('-S', '--all-string', 'disable automatic type conversion', TrueClass) {|b|
|
231
265
|
@opts[:all_string] = b
|
232
266
|
}
|
233
|
-
op.on('-t', '--time-column NAME', 'name of the time column
|
267
|
+
op.on('-t', '--time-column NAME', 'name of the time column') {|s|
|
234
268
|
@opts[:time_column] = s
|
235
269
|
}
|
236
270
|
op.on('-T', '--time-format FORMAT', 'strftime(3) format of the time column') {|s|
|
237
271
|
@opts[:time_format] = s
|
238
272
|
}
|
273
|
+
op.on('--time-value TIME', 'value of the time column') {|s|
|
274
|
+
if s.to_i.to_s == s
|
275
|
+
@opts[:time_value] = s.to_i
|
276
|
+
else
|
277
|
+
@opts[:time_value] = Time.parse(s).to_i
|
278
|
+
end
|
279
|
+
}
|
239
280
|
op.on('-e', '--encoding NAME', "text encoding") {|s|
|
240
281
|
@opts[:encoding] = s
|
241
282
|
}
|
@@ -275,17 +316,21 @@ module TreasureData
|
|
275
316
|
reader = LineReader.new(io, error, opts)
|
276
317
|
parser = DelimiterParser.new(reader, error, opts)
|
277
318
|
if opts[:column_header]
|
278
|
-
column_names = parser.
|
319
|
+
column_names = parser.forward
|
279
320
|
elsif opts[:column_names]
|
280
321
|
column_names = opts[:column_names]
|
281
322
|
else
|
282
323
|
raise "--column-header or --columns option is required"
|
283
324
|
end
|
284
325
|
unless opts[:all_string]
|
285
|
-
parser = AutoTypeConvertParserFilter.new(parser, error)
|
326
|
+
parser = AutoTypeConvertParserFilter.new(parser, error, opts)
|
286
327
|
end
|
287
328
|
parser = HashBuilder.new(parser, error, column_names)
|
288
|
-
|
329
|
+
if opts[:time_value]
|
330
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
331
|
+
else
|
332
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
333
|
+
end
|
289
334
|
}
|
290
335
|
|
291
336
|
#when 'apache'
|
@@ -295,28 +340,36 @@ module TreasureData
|
|
295
340
|
reader = LineReader.new(io, error, opts)
|
296
341
|
parser = JSONParser.new(reader, error, opts)
|
297
342
|
if opts[:column_header]
|
298
|
-
column_names = parser.
|
343
|
+
column_names = parser.forward
|
299
344
|
elsif opts[:column_names]
|
300
345
|
column_names = opts[:column_names]
|
301
346
|
end
|
302
347
|
if column_names
|
303
348
|
parser = HashBuilder.new(parser, error, column_names)
|
304
349
|
end
|
305
|
-
|
350
|
+
if opts[:time_value]
|
351
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
352
|
+
else
|
353
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
354
|
+
end
|
306
355
|
}
|
307
356
|
|
308
357
|
when 'msgpack'
|
309
358
|
Proc.new {|io,error|
|
310
359
|
parser = MessagePackParsingReader.new(io, error, opts)
|
311
360
|
if opts[:column_header]
|
312
|
-
column_names = parser.
|
361
|
+
column_names = parser.forward
|
313
362
|
elsif opts[:column_names]
|
314
363
|
column_names = opts[:column_names]
|
315
364
|
end
|
316
365
|
if column_names
|
317
366
|
parser = HashBuilder.new(parser, error, column_names)
|
318
367
|
end
|
319
|
-
|
368
|
+
if opts[:time_value]
|
369
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
370
|
+
else
|
371
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
372
|
+
end
|
320
373
|
}
|
321
374
|
end
|
322
375
|
end
|
@@ -325,7 +378,7 @@ module TreasureData
|
|
325
378
|
factory = compose_factory
|
326
379
|
parser = factory.call(io, error)
|
327
380
|
begin
|
328
|
-
while record = parser.
|
381
|
+
while record = parser.forward
|
329
382
|
block.call(record)
|
330
383
|
end
|
331
384
|
rescue EOFError
|
data/lib/td/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: td
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.45
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: msgpack
|