td 0.10.39 → 0.10.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/lib/td/command/bulk_import.rb +37 -2
- data/lib/td/command/list.rb +6 -2
- data/lib/td/file_reader.rb +20 -8
- data/lib/td/version.rb +1 -1
- metadata +1 -1
data/ChangeLog
CHANGED
@@ -1,4 +1,11 @@
|
|
1
1
|
|
2
|
+
== 2012-08-07 version 0.10.40
|
3
|
+
|
4
|
+
* Added bulk_import:delete_parts
|
5
|
+
* Hide obsoleted bulk_import:upload_part and :delete_part subcommands
|
6
|
+
* bulk_import:upload_parts supports --auto-perform option
|
7
|
+
|
8
|
+
|
2
9
|
== 2012-08-07 version 0.10.39
|
3
10
|
|
4
11
|
* Added bulk_import:prepare_parts and bulk_import:upload_parts subcommands
|
@@ -84,6 +84,7 @@ module Command
|
|
84
84
|
}
|
85
85
|
end
|
86
86
|
|
87
|
+
# obsoleted
|
87
88
|
def bulk_import_upload_part(op)
|
88
89
|
retry_limit = 10
|
89
90
|
retry_wait = 1
|
@@ -102,6 +103,7 @@ module Command
|
|
102
103
|
retry_wait = 1
|
103
104
|
suffix_count = 0
|
104
105
|
part_prefix = ""
|
106
|
+
auto_perform = false
|
105
107
|
|
106
108
|
op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
|
107
109
|
part_prefix = s
|
@@ -109,6 +111,9 @@ module Command
|
|
109
111
|
op.on('-s', '--use-suffix COUNT', 'use COUNT number of . (dots) in the source file name to the parts name', Integer) {|i|
|
110
112
|
suffix_count = i
|
111
113
|
}
|
114
|
+
op.on('--auto-perform', 'perform bulk import job automatically', TrueClass) {|b|
|
115
|
+
auto_perform = b
|
116
|
+
}
|
112
117
|
|
113
118
|
name, *files = op.cmd_parse
|
114
119
|
|
@@ -125,8 +130,17 @@ module Command
|
|
125
130
|
}
|
126
131
|
|
127
132
|
$stderr.puts "done."
|
133
|
+
|
134
|
+
if auto_perform
|
135
|
+
client = get_client
|
136
|
+
job = client.perform_bulk_import(name)
|
137
|
+
|
138
|
+
$stderr.puts "Job #{job.job_id} is queued."
|
139
|
+
$stderr.puts "Use '#{$prog} job:show [-w] #{job.job_id}' to show the status."
|
140
|
+
end
|
128
141
|
end
|
129
142
|
|
143
|
+
# obsoleted
|
130
144
|
def bulk_import_delete_part(op)
|
131
145
|
name, part_name = op.cmd_parse
|
132
146
|
|
@@ -137,6 +151,27 @@ module Command
|
|
137
151
|
$stderr.puts "Part '#{part_name}' is deleted."
|
138
152
|
end
|
139
153
|
|
154
|
+
def bulk_import_delete_parts(op)
|
155
|
+
part_prefix = ""
|
156
|
+
|
157
|
+
op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
|
158
|
+
part_prefix = s
|
159
|
+
}
|
160
|
+
|
161
|
+
name, *part_names = op.cmd_parse
|
162
|
+
|
163
|
+
client = get_client
|
164
|
+
|
165
|
+
part_names.each {|part_name|
|
166
|
+
part_name = part_prefix + part_name
|
167
|
+
|
168
|
+
$stderr.puts "Deleting '#{part_name}'..."
|
169
|
+
client.bulk_import_delete_part(name, part_name)
|
170
|
+
}
|
171
|
+
|
172
|
+
$stderr.puts "done."
|
173
|
+
end
|
174
|
+
|
140
175
|
def bulk_import_perform(op)
|
141
176
|
wait = false
|
142
177
|
force = false
|
@@ -172,7 +207,7 @@ module Command
|
|
172
207
|
job = client.perform_bulk_import(name)
|
173
208
|
|
174
209
|
$stderr.puts "Job #{job.job_id} is queued."
|
175
|
-
$stderr.puts "Use '#{$prog} job:show #{job.job_id}' to show the status."
|
210
|
+
$stderr.puts "Use '#{$prog} job:show [-w] #{job.job_id}' to show the status."
|
176
211
|
|
177
212
|
if wait
|
178
213
|
require 'td/command/job' # wait_job
|
@@ -237,7 +272,7 @@ module Command
|
|
237
272
|
|
238
273
|
PART_SPLIT_SIZE = 16*1024*1024
|
239
274
|
|
240
|
-
def
|
275
|
+
def bulk_import_prepare_parts(op)
|
241
276
|
outdir = nil
|
242
277
|
split_size_kb = PART_SPLIT_SIZE / 1024 # kb
|
243
278
|
|
data/lib/td/command/list.rb
CHANGED
@@ -128,6 +128,7 @@ module List
|
|
128
128
|
COMMAND = {}
|
129
129
|
GUESS = {}
|
130
130
|
HELP_EXCLUDE = [/^help/, /^account/, /^aggr/]
|
131
|
+
USAGE_EXCLUDE = [/bulk_import:upload_part\z/, /bulk_import:delete_part\z/]
|
131
132
|
|
132
133
|
def self.add_list(name, args, description, *examples)
|
133
134
|
LIST << COMMAND[name] = CommandOption.new(name, args, description, examples)
|
@@ -201,7 +202,9 @@ module List
|
|
201
202
|
|
202
203
|
msg = %[Additional commands, type "#{File.basename($0)} help COMMAND" for more details:\n\n]
|
203
204
|
ops.each {|op|
|
204
|
-
|
205
|
+
unless USAGE_EXCLUDE.any? {|pattern| pattern =~ op.name }
|
206
|
+
msg << %[ #{op.usage}\n]
|
207
|
+
end
|
205
208
|
}
|
206
209
|
msg << %[\n]
|
207
210
|
c.override_message = msg
|
@@ -228,10 +231,11 @@ module List
|
|
228
231
|
add_list 'bulk_import:list', %w[], 'List bulk import sessions', 'bulk_import:list'
|
229
232
|
add_list 'bulk_import:show', %w[name], 'Show list of uploaded parts', 'bulk_import:show'
|
230
233
|
add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
|
231
|
-
add_list 'bulk_import:
|
234
|
+
add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
|
232
235
|
add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
|
233
236
|
add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --prefix logs_'
|
234
237
|
add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
|
238
|
+
add_list 'bulk_import:delete_parts', %w[name ids_], 'Delete uploaded files from a bulk import session', 'bulk_import:delete_parts logs_201201 01h 02h 03h'
|
235
239
|
add_list 'bulk_import:perform', %w[name], 'Start to validate and convert uploaded files', 'bulk_import:perform logs_201201'
|
236
240
|
add_list 'bulk_import:error_records', %w[name], 'Show records which did not pass validations', 'bulk_import:error_records logs_201201'
|
237
241
|
add_list 'bulk_import:commit', %w[name], 'Start to commit a performed bulk import session', 'bulk_import:commit logs_201201'
|
data/lib/td/file_reader.rb
CHANGED
@@ -35,9 +35,6 @@ module TreasureData
|
|
35
35
|
@reader = reader
|
36
36
|
@delimiter_expr = opts[:delimiter_expr]
|
37
37
|
@null_expr = opts[:null_expr]
|
38
|
-
# TODO
|
39
|
-
#@escape_char = opts[:escape_char]
|
40
|
-
#@quote_char = opts[:quote_char]
|
41
38
|
end
|
42
39
|
|
43
40
|
def next
|
@@ -49,6 +46,21 @@ module TreasureData
|
|
49
46
|
end
|
50
47
|
end
|
51
48
|
|
49
|
+
# TODO
|
50
|
+
#class QuotedDelimiterParsingReader
|
51
|
+
# def initialize(io, error, opts)
|
52
|
+
# require 'strscan'
|
53
|
+
# @io = io
|
54
|
+
# @error = error
|
55
|
+
# @delimiter_expr = opts[:delimiter_expr]
|
56
|
+
# @quote_char = opts[:quote_char]
|
57
|
+
# @escape_char = opts[:escape_char]
|
58
|
+
# end
|
59
|
+
|
60
|
+
# def next
|
61
|
+
# end
|
62
|
+
#end
|
63
|
+
|
52
64
|
class JSONParser
|
53
65
|
def initialize(reader, error, opts)
|
54
66
|
@reader = reader
|
@@ -68,6 +80,7 @@ module TreasureData
|
|
68
80
|
end
|
69
81
|
end
|
70
82
|
|
83
|
+
# TODO
|
71
84
|
#class ApacheParser
|
72
85
|
# REGEXP = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
|
73
86
|
#
|
@@ -119,7 +132,6 @@ module TreasureData
|
|
119
132
|
|
120
133
|
def next
|
121
134
|
array = @parser.next
|
122
|
-
# FIXME error handling
|
123
135
|
Hash[@columns.zip(array)]
|
124
136
|
end
|
125
137
|
end
|
@@ -190,22 +202,22 @@ module TreasureData
|
|
190
202
|
attr_accessor :parser_class
|
191
203
|
|
192
204
|
def init_optparse(op)
|
193
|
-
op.on('-f', '--format NAME', "source file format") {|s|
|
205
|
+
op.on('-f', '--format NAME', "source file format [csv, tsv, msgpack, json]") {|s|
|
194
206
|
set_format_template(s)
|
195
207
|
}
|
196
|
-
op.on('-h', '--columns NAME,NAME,...', 'column names') {|s|
|
208
|
+
op.on('-h', '--columns NAME,NAME,...', 'column names (use --column-header instead if the first line has column names)') {|s|
|
197
209
|
@opts[:column_names] = s.split(',')
|
198
210
|
}
|
199
211
|
op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
|
200
212
|
@opts[:column_header] = b
|
201
213
|
}
|
202
|
-
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].inspect[1..-2]}") {|s|
|
214
|
+
op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].inspect[1..-2]})") {|s|
|
203
215
|
@opts[:delimiter_expr] = Regexp.new(s)
|
204
216
|
}
|
205
217
|
#op.on('-D', '--line-delimiter REGEX', "delimiter between rows (default: #{@default_opts[:line_delimiter_expr].inspect[1..-2]})") {|s|
|
206
218
|
# @opts[:line_delimiter_expr] = Regexp.new(s)
|
207
219
|
#}
|
208
|
-
op.on('-N', '--null REGEX', "null expression (default: #{@default_opts[:null_expr].inspect[1..-2]}") {|s|
|
220
|
+
op.on('-N', '--null REGEX', "null expression (default: #{@default_opts[:null_expr].inspect[1..-2]})") {|s|
|
209
221
|
@opts[:null_expr] = Regexp.new(s)
|
210
222
|
}
|
211
223
|
# TODO
|
data/lib/td/version.rb
CHANGED