td 0.10.44 → 0.10.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/lib/td/file_reader.rb +90 -37
- data/lib/td/version.rb +1 -1
- metadata +2 -2
data/ChangeLog
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
|
2
|
+
== 2012-09-12 version 0.10.45
|
3
|
+
|
4
|
+
* bulk_import:prepare_parts supports --true and --false type conversion options
|
5
|
+
* bulk_import:prepare_parts supports --time-value option
|
6
|
+
* bulk_import:prepare_parts convers /null/i to NULL by default
|
7
|
+
|
8
|
+
|
2
9
|
== 2012-09-04 version 0.10.44
|
3
10
|
|
4
11
|
* Added password:change subcommand
|
data/lib/td/file_reader.rb
CHANGED
@@ -10,8 +10,8 @@ module TreasureData
|
|
10
10
|
@u = MessagePack::Unpacker.new(@io)
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
14
|
-
@u.
|
13
|
+
def forward
|
14
|
+
@u.forward
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -25,7 +25,7 @@ module TreasureData
|
|
25
25
|
@error = error
|
26
26
|
end
|
27
27
|
|
28
|
-
def
|
28
|
+
def forward_row
|
29
29
|
@io.readline($/).chomp
|
30
30
|
end
|
31
31
|
end
|
@@ -34,15 +34,11 @@ module TreasureData
|
|
34
34
|
def initialize(reader, error, opts)
|
35
35
|
@reader = reader
|
36
36
|
@delimiter_expr = opts[:delimiter_expr]
|
37
|
-
@null_expr = opts[:null_expr]
|
38
37
|
end
|
39
38
|
|
40
|
-
def
|
41
|
-
row = @reader.
|
39
|
+
def forward
|
40
|
+
row = @reader.forward_row
|
42
41
|
array = row.split(@delimiter_expr)
|
43
|
-
array.map! {|x|
|
44
|
-
@null_expr =~ x ? nil : x
|
45
|
-
}
|
46
42
|
end
|
47
43
|
end
|
48
44
|
|
@@ -57,7 +53,7 @@ module TreasureData
|
|
57
53
|
# @escape_char = opts[:escape_char]
|
58
54
|
# end
|
59
55
|
|
60
|
-
# def
|
56
|
+
# def forward
|
61
57
|
# end
|
62
58
|
#end
|
63
59
|
|
@@ -67,9 +63,9 @@ module TreasureData
|
|
67
63
|
@error = error
|
68
64
|
end
|
69
65
|
|
70
|
-
def
|
66
|
+
def forward
|
71
67
|
while true
|
72
|
-
line = @reader.
|
68
|
+
line = @reader.forward_row
|
73
69
|
begin
|
74
70
|
return JSON.parse(line)
|
75
71
|
rescue
|
@@ -88,9 +84,9 @@ module TreasureData
|
|
88
84
|
# @reader = reader
|
89
85
|
# end
|
90
86
|
#
|
91
|
-
# def
|
87
|
+
# def forward
|
92
88
|
# while true
|
93
|
-
# m = REGEXP.match(@reader.
|
89
|
+
# m = REGEXP.match(@reader.forward_row)
|
94
90
|
# if m
|
95
91
|
# h = {
|
96
92
|
# 'host' => m[1],
|
@@ -110,16 +106,29 @@ module TreasureData
|
|
110
106
|
#end
|
111
107
|
|
112
108
|
class AutoTypeConvertParserFilter
|
113
|
-
def initialize(parser, error)
|
109
|
+
def initialize(parser, error, opts)
|
114
110
|
@parser = parser
|
111
|
+
@null_expr = opts[:null_expr]
|
112
|
+
@true_expr = opts[:true_expr]
|
113
|
+
@false_expr = opts[:false_expr]
|
115
114
|
end
|
116
115
|
|
117
|
-
def
|
118
|
-
array = @parser.
|
116
|
+
def forward
|
117
|
+
array = @parser.forward
|
119
118
|
array.map! {|s|
|
120
|
-
|
121
|
-
|
122
|
-
|
119
|
+
if !s.is_a?(String)
|
120
|
+
s
|
121
|
+
elsif s =~ @null_expr
|
122
|
+
nil
|
123
|
+
elsif s =~ @true_expr
|
124
|
+
true
|
125
|
+
elsif s =~ @false_expr
|
126
|
+
false
|
127
|
+
else
|
128
|
+
# nil.to_i == 0 != nil.to_s
|
129
|
+
i = s.to_i
|
130
|
+
i.to_s == s ? i : s
|
131
|
+
end
|
123
132
|
}
|
124
133
|
end
|
125
134
|
end
|
@@ -130,8 +139,8 @@ module TreasureData
|
|
130
139
|
@columns = columns
|
131
140
|
end
|
132
141
|
|
133
|
-
def
|
134
|
-
array = @parser.
|
142
|
+
def forward
|
143
|
+
array = @parser.forward
|
135
144
|
Hash[@columns.zip(array)]
|
136
145
|
end
|
137
146
|
end
|
@@ -143,14 +152,14 @@ module TreasureData
|
|
143
152
|
@error = error
|
144
153
|
@time_column = opts[:time_column]
|
145
154
|
unless @time_column
|
146
|
-
raise '-
|
155
|
+
raise '--time-column or --time-value option is required'
|
147
156
|
end
|
148
157
|
@time_format = opts[:time_format]
|
149
158
|
end
|
150
159
|
|
151
|
-
def
|
160
|
+
def forward
|
152
161
|
while true
|
153
|
-
row = @parser.
|
162
|
+
row = @parser.forward
|
154
163
|
tval = row[@time_column]
|
155
164
|
|
156
165
|
unless tval
|
@@ -186,12 +195,31 @@ module TreasureData
|
|
186
195
|
end
|
187
196
|
end
|
188
197
|
|
198
|
+
class SetTimeParserFilter
|
199
|
+
def initialize(parser, error, opts)
|
200
|
+
@parser = parser
|
201
|
+
@error = error
|
202
|
+
@time_value = opts[:time_value]
|
203
|
+
unless @time_value
|
204
|
+
raise '--time-column or --time-value option is required'
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def forward
|
209
|
+
row = @parser.forward
|
210
|
+
row['time'] = @time_value
|
211
|
+
row
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
189
215
|
def initialize
|
190
216
|
@format = "text"
|
191
217
|
@default_opts = {
|
192
218
|
:delimiter_expr => /\t|,/,
|
193
219
|
#:line_delimiter_expr => /\r?\n/,
|
194
|
-
:null_expr => /\A(
|
220
|
+
:null_expr => /\A(?:null||\-|\\N)\z/i,
|
221
|
+
:true_expr => /\A(?:true)\z/i,
|
222
|
+
:false_expr => /\A(?:false)\z/i,
|
195
223
|
#:quote_char => "\"",
|
196
224
|
}
|
197
225
|
@opts = {}
|
@@ -211,15 +239,21 @@ module TreasureData
|
|
211
239
|
op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
|
212
240
|
@opts[:column_header] = b
|
213
241
|
}
|
214
|
-
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].
|
242
|
+
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].to_s})") {|s|
|
215
243
|
@opts[:delimiter_expr] = Regexp.new(s)
|
216
244
|
}
|
217
245
|
#op.on('-D', '--line-delimiter REGEX', "delimiter between rows (default: #{@default_opts[:line_delimiter_expr].inspect[1..-2]})") {|s|
|
218
246
|
# @opts[:line_delimiter_expr] = Regexp.new(s)
|
219
247
|
#}
|
220
|
-
op.on('
|
248
|
+
op.on('--null REGEX', "null expression for the automatic type conversion (default: #{@default_opts[:null_expr].to_s})") {|s|
|
221
249
|
@opts[:null_expr] = Regexp.new(s)
|
222
250
|
}
|
251
|
+
op.on('--true REGEX', "true expression for the automatic type conversion (default: #{@default_opts[:true_expr].to_s})") {|s|
|
252
|
+
@opts[:true_expr] = Regexp.new(s)
|
253
|
+
}
|
254
|
+
op.on('--false REGEX', "false expression for the automatic type conversion (default: #{@default_opts[:false_expr].to_s})") {|s|
|
255
|
+
@opts[:false_expr] = Regexp.new(s)
|
256
|
+
}
|
223
257
|
# TODO
|
224
258
|
#op.on('-E', '--escape CHAR', "escape character (default: no escape character)") {|s|
|
225
259
|
# @opts[:escape_char] = s
|
@@ -230,12 +264,19 @@ module TreasureData
|
|
230
264
|
op.on('-S', '--all-string', 'disable automatic type conversion', TrueClass) {|b|
|
231
265
|
@opts[:all_string] = b
|
232
266
|
}
|
233
|
-
op.on('-t', '--time-column NAME', 'name of the time column
|
267
|
+
op.on('-t', '--time-column NAME', 'name of the time column') {|s|
|
234
268
|
@opts[:time_column] = s
|
235
269
|
}
|
236
270
|
op.on('-T', '--time-format FORMAT', 'strftime(3) format of the time column') {|s|
|
237
271
|
@opts[:time_format] = s
|
238
272
|
}
|
273
|
+
op.on('--time-value TIME', 'value of the time column') {|s|
|
274
|
+
if s.to_i.to_s == s
|
275
|
+
@opts[:time_value] = s.to_i
|
276
|
+
else
|
277
|
+
@opts[:time_value] = Time.parse(s).to_i
|
278
|
+
end
|
279
|
+
}
|
239
280
|
op.on('-e', '--encoding NAME', "text encoding") {|s|
|
240
281
|
@opts[:encoding] = s
|
241
282
|
}
|
@@ -275,17 +316,21 @@ module TreasureData
|
|
275
316
|
reader = LineReader.new(io, error, opts)
|
276
317
|
parser = DelimiterParser.new(reader, error, opts)
|
277
318
|
if opts[:column_header]
|
278
|
-
column_names = parser.
|
319
|
+
column_names = parser.forward
|
279
320
|
elsif opts[:column_names]
|
280
321
|
column_names = opts[:column_names]
|
281
322
|
else
|
282
323
|
raise "--column-header or --columns option is required"
|
283
324
|
end
|
284
325
|
unless opts[:all_string]
|
285
|
-
parser = AutoTypeConvertParserFilter.new(parser, error)
|
326
|
+
parser = AutoTypeConvertParserFilter.new(parser, error, opts)
|
286
327
|
end
|
287
328
|
parser = HashBuilder.new(parser, error, column_names)
|
288
|
-
|
329
|
+
if opts[:time_value]
|
330
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
331
|
+
else
|
332
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
333
|
+
end
|
289
334
|
}
|
290
335
|
|
291
336
|
#when 'apache'
|
@@ -295,28 +340,36 @@ module TreasureData
|
|
295
340
|
reader = LineReader.new(io, error, opts)
|
296
341
|
parser = JSONParser.new(reader, error, opts)
|
297
342
|
if opts[:column_header]
|
298
|
-
column_names = parser.
|
343
|
+
column_names = parser.forward
|
299
344
|
elsif opts[:column_names]
|
300
345
|
column_names = opts[:column_names]
|
301
346
|
end
|
302
347
|
if column_names
|
303
348
|
parser = HashBuilder.new(parser, error, column_names)
|
304
349
|
end
|
305
|
-
|
350
|
+
if opts[:time_value]
|
351
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
352
|
+
else
|
353
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
354
|
+
end
|
306
355
|
}
|
307
356
|
|
308
357
|
when 'msgpack'
|
309
358
|
Proc.new {|io,error|
|
310
359
|
parser = MessagePackParsingReader.new(io, error, opts)
|
311
360
|
if opts[:column_header]
|
312
|
-
column_names = parser.
|
361
|
+
column_names = parser.forward
|
313
362
|
elsif opts[:column_names]
|
314
363
|
column_names = opts[:column_names]
|
315
364
|
end
|
316
365
|
if column_names
|
317
366
|
parser = HashBuilder.new(parser, error, column_names)
|
318
367
|
end
|
319
|
-
|
368
|
+
if opts[:time_value]
|
369
|
+
parser = SetTimeParserFilter.new(parser, error, opts)
|
370
|
+
else
|
371
|
+
parser = TimeParserFilter.new(parser, error, opts)
|
372
|
+
end
|
320
373
|
}
|
321
374
|
end
|
322
375
|
end
|
@@ -325,7 +378,7 @@ module TreasureData
|
|
325
378
|
factory = compose_factory
|
326
379
|
parser = factory.call(io, error)
|
327
380
|
begin
|
328
|
-
while record = parser.
|
381
|
+
while record = parser.forward
|
329
382
|
block.call(record)
|
330
383
|
end
|
331
384
|
rescue EOFError
|
data/lib/td/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: td
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.45
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: msgpack
|