td 0.10.39 → 0.10.40
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/lib/td/command/bulk_import.rb +37 -2
- data/lib/td/command/list.rb +6 -2
- data/lib/td/file_reader.rb +20 -8
- data/lib/td/version.rb +1 -1
- metadata +1 -1
data/ChangeLog
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
|
2
|
+
== 2012-08-07 version 0.10.40
|
3
|
+
|
4
|
+
* Added bulk_import:delete_parts
|
5
|
+
* Hide obsoleted bulk_import:upload_part and :delete_part subcommands
|
6
|
+
* bulk_import:upload_parts supports --auto-perform option
|
7
|
+
|
8
|
+
|
2
9
|
== 2012-08-07 version 0.10.39
|
3
10
|
|
4
11
|
* Added bulk_import:prepare_parts and bulk_import:upload_parts subcommands
|
@@ -84,6 +84,7 @@ module Command
|
|
84
84
|
}
|
85
85
|
end
|
86
86
|
|
87
|
+
# obsoleted
|
87
88
|
def bulk_import_upload_part(op)
|
88
89
|
retry_limit = 10
|
89
90
|
retry_wait = 1
|
@@ -102,6 +103,7 @@ module Command
|
|
102
103
|
retry_wait = 1
|
103
104
|
suffix_count = 0
|
104
105
|
part_prefix = ""
|
106
|
+
auto_perform = false
|
105
107
|
|
106
108
|
op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
|
107
109
|
part_prefix = s
|
@@ -109,6 +111,9 @@ module Command
|
|
109
111
|
op.on('-s', '--use-suffix COUNT', 'use COUNT number of . (dots) in the source file name to the parts name', Integer) {|i|
|
110
112
|
suffix_count = i
|
111
113
|
}
|
114
|
+
op.on('--auto-perform', 'perform bulk import job automatically', TrueClass) {|b|
|
115
|
+
auto_perform = b
|
116
|
+
}
|
112
117
|
|
113
118
|
name, *files = op.cmd_parse
|
114
119
|
|
@@ -125,8 +130,17 @@ module Command
|
|
125
130
|
}
|
126
131
|
|
127
132
|
$stderr.puts "done."
|
133
|
+
|
134
|
+
if auto_perform
|
135
|
+
client = get_client
|
136
|
+
job = client.perform_bulk_import(name)
|
137
|
+
|
138
|
+
$stderr.puts "Job #{job.job_id} is queued."
|
139
|
+
$stderr.puts "Use '#{$prog} job:show [-w] #{job.job_id}' to show the status."
|
140
|
+
end
|
128
141
|
end
|
129
142
|
|
143
|
+
# obsoleted
|
130
144
|
def bulk_import_delete_part(op)
|
131
145
|
name, part_name = op.cmd_parse
|
132
146
|
|
@@ -137,6 +151,27 @@ module Command
|
|
137
151
|
$stderr.puts "Part '#{part_name}' is deleted."
|
138
152
|
end
|
139
153
|
|
154
|
+
def bulk_import_delete_parts(op)
|
155
|
+
part_prefix = ""
|
156
|
+
|
157
|
+
op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
|
158
|
+
part_prefix = s
|
159
|
+
}
|
160
|
+
|
161
|
+
name, *part_names = op.cmd_parse
|
162
|
+
|
163
|
+
client = get_client
|
164
|
+
|
165
|
+
part_names.each {|part_name|
|
166
|
+
part_name = part_prefix + part_name
|
167
|
+
|
168
|
+
$stderr.puts "Deleting '#{part_name}'..."
|
169
|
+
client.bulk_import_delete_part(name, part_name)
|
170
|
+
}
|
171
|
+
|
172
|
+
$stderr.puts "done."
|
173
|
+
end
|
174
|
+
|
140
175
|
def bulk_import_perform(op)
|
141
176
|
wait = false
|
142
177
|
force = false
|
@@ -172,7 +207,7 @@ module Command
|
|
172
207
|
job = client.perform_bulk_import(name)
|
173
208
|
|
174
209
|
$stderr.puts "Job #{job.job_id} is queued."
|
175
|
-
$stderr.puts "Use '#{$prog} job:show #{job.job_id}' to show the status."
|
210
|
+
$stderr.puts "Use '#{$prog} job:show [-w] #{job.job_id}' to show the status."
|
176
211
|
|
177
212
|
if wait
|
178
213
|
require 'td/command/job' # wait_job
|
@@ -237,7 +272,7 @@ module Command
|
|
237
272
|
|
238
273
|
PART_SPLIT_SIZE = 16*1024*1024
|
239
274
|
|
240
|
-
def
|
275
|
+
def bulk_import_prepare_parts(op)
|
241
276
|
outdir = nil
|
242
277
|
split_size_kb = PART_SPLIT_SIZE / 1024 # kb
|
243
278
|
|
data/lib/td/command/list.rb
CHANGED
@@ -128,6 +128,7 @@ module List
|
|
128
128
|
COMMAND = {}
|
129
129
|
GUESS = {}
|
130
130
|
HELP_EXCLUDE = [/^help/, /^account/, /^aggr/]
|
131
|
+
USAGE_EXCLUDE = [/bulk_import:upload_part\z/, /bulk_import:delete_part\z/]
|
131
132
|
|
132
133
|
def self.add_list(name, args, description, *examples)
|
133
134
|
LIST << COMMAND[name] = CommandOption.new(name, args, description, examples)
|
@@ -201,7 +202,9 @@ module List
|
|
201
202
|
|
202
203
|
msg = %[Additional commands, type "#{File.basename($0)} help COMMAND" for more details:\n\n]
|
203
204
|
ops.each {|op|
|
204
|
-
|
205
|
+
unless USAGE_EXCLUDE.any? {|pattern| pattern =~ op.name }
|
206
|
+
msg << %[ #{op.usage}\n]
|
207
|
+
end
|
205
208
|
}
|
206
209
|
msg << %[\n]
|
207
210
|
c.override_message = msg
|
@@ -228,10 +231,11 @@ module List
|
|
228
231
|
add_list 'bulk_import:list', %w[], 'List bulk import sessions', 'bulk_import:list'
|
229
232
|
add_list 'bulk_import:show', %w[name], 'Show list of uploaded parts', 'bulk_import:show'
|
230
233
|
add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
|
231
|
-
add_list 'bulk_import:
|
234
|
+
add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
|
232
235
|
add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
|
233
236
|
add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --prefix logs_'
|
234
237
|
add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
|
238
|
+
add_list 'bulk_import:delete_parts', %w[name ids_], 'Delete uploaded files from a bulk import session', 'bulk_import:delete_parts logs_201201 01h 02h 03h'
|
235
239
|
add_list 'bulk_import:perform', %w[name], 'Start to validate and convert uploaded files', 'bulk_import:perform logs_201201'
|
236
240
|
add_list 'bulk_import:error_records', %w[name], 'Show records which did not pass validations', 'bulk_import:error_records logs_201201'
|
237
241
|
add_list 'bulk_import:commit', %w[name], 'Start to commit a performed bulk import session', 'bulk_import:commit logs_201201'
|
data/lib/td/file_reader.rb
CHANGED
@@ -35,9 +35,6 @@ module TreasureData
|
|
35
35
|
@reader = reader
|
36
36
|
@delimiter_expr = opts[:delimiter_expr]
|
37
37
|
@null_expr = opts[:null_expr]
|
38
|
-
# TODO
|
39
|
-
#@escape_char = opts[:escape_char]
|
40
|
-
#@quote_char = opts[:quote_char]
|
41
38
|
end
|
42
39
|
|
43
40
|
def next
|
@@ -49,6 +46,21 @@ module TreasureData
|
|
49
46
|
end
|
50
47
|
end
|
51
48
|
|
49
|
+
# TODO
|
50
|
+
#class QuotedDelimiterParsingReader
|
51
|
+
# def initialize(io, error, opts)
|
52
|
+
# require 'strscan'
|
53
|
+
# @io = io
|
54
|
+
# @error = error
|
55
|
+
# @delimiter_expr = opts[:delimiter_expr]
|
56
|
+
# @quote_char = opts[:quote_char]
|
57
|
+
# @escape_char = opts[:escape_char]
|
58
|
+
# end
|
59
|
+
|
60
|
+
# def next
|
61
|
+
# end
|
62
|
+
#end
|
63
|
+
|
52
64
|
class JSONParser
|
53
65
|
def initialize(reader, error, opts)
|
54
66
|
@reader = reader
|
@@ -68,6 +80,7 @@ module TreasureData
|
|
68
80
|
end
|
69
81
|
end
|
70
82
|
|
83
|
+
# TODO
|
71
84
|
#class ApacheParser
|
72
85
|
# REGEXP = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
|
73
86
|
#
|
@@ -119,7 +132,6 @@ module TreasureData
|
|
119
132
|
|
120
133
|
def next
|
121
134
|
array = @parser.next
|
122
|
-
# FIXME error handling
|
123
135
|
Hash[@columns.zip(array)]
|
124
136
|
end
|
125
137
|
end
|
@@ -190,22 +202,22 @@ module TreasureData
|
|
190
202
|
attr_accessor :parser_class
|
191
203
|
|
192
204
|
def init_optparse(op)
|
193
|
-
op.on('-f', '--format NAME', "source file format") {|s|
|
205
|
+
op.on('-f', '--format NAME', "source file format [csv, tsv, msgpack, json]") {|s|
|
194
206
|
set_format_template(s)
|
195
207
|
}
|
196
|
-
op.on('-h', '--columns NAME,NAME,...', 'column names') {|s|
|
208
|
+
op.on('-h', '--columns NAME,NAME,...', 'column names (use --column-header instead if the first line has column names)') {|s|
|
197
209
|
@opts[:column_names] = s.split(',')
|
198
210
|
}
|
199
211
|
op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
|
200
212
|
@opts[:column_header] = b
|
201
213
|
}
|
202
|
-
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].inspect[1..-2]}") {|s|
|
214
|
+
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].inspect[1..-2]})") {|s|
|
203
215
|
@opts[:delimiter_expr] = Regexp.new(s)
|
204
216
|
}
|
205
217
|
#op.on('-D', '--line-delimiter REGEX', "delimiter between rows (default: #{@default_opts[:line_delimiter_expr].inspect[1..-2]})") {|s|
|
206
218
|
# @opts[:line_delimiter_expr] = Regexp.new(s)
|
207
219
|
#}
|
208
|
-
op.on('-N', '--null REGEX', "null expression (default: #{@default_opts[:null_expr].inspect[1..-2]}") {|s|
|
220
|
+
op.on('-N', '--null REGEX', "null expression (default: #{@default_opts[:null_expr].inspect[1..-2]})") {|s|
|
209
221
|
@opts[:null_expr] = Regexp.new(s)
|
210
222
|
}
|
211
223
|
# TODO
|
data/lib/td/version.rb
CHANGED