td 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,84 @@
1
+
2
+ module TD
3
+ module Command
4
+
5
+ def account
6
+ op = cmd_opt 'account', :user_name?
7
+
8
+ op.banner << "\noptions:\n"
9
+
10
+ force = false
11
+ op.on('-f', '--force', 'overwrite current setting', TrueClass) {|b|
12
+ force = true
13
+ }
14
+
15
+ user_name = op.cmd_parse
16
+
17
+ require 'td/config'
18
+ conf = nil
19
+ begin
20
+ conf = Config.read($TRD_CONFIG_PATH)
21
+ rescue ConfigError
22
+ end
23
+ if conf && conf['account.user']
24
+ unless force
25
+ $stderr.puts "TreasureData account is already configured with '#{conf['account.user']}' account."
26
+ $stderr.puts "Add '-f' option to overwrite this setting."
27
+ exit 0
28
+ end
29
+ end
30
+
31
+ unless user_name
32
+ print "User name: "
33
+ line = STDIN.gets || ""
34
+ user_name = line.strip
35
+ end
36
+
37
+ if user_name.empty?
38
+ $stderr.puts "Canceled."
39
+ exit 0
40
+ end
41
+
42
+ api = nil
43
+
44
+ 2.times do
45
+ begin
46
+ system "stty -echo" # TODO termios
47
+ print "Password: "
48
+ password = STDIN.gets || ""
49
+ password = password[0..-2] # strip \n
50
+ ensure
51
+ system "stty echo" # TODO termios
52
+ print "\n"
53
+ end
54
+
55
+ if password.empty?
56
+ $stderr.puts "Canceled."
57
+ exit 0
58
+ end
59
+
60
+ require 'td/api'
61
+
62
+ begin
63
+ api = API.authenticate(user_name, password)
64
+ rescue TD::AuthError
65
+ $stderr.puts "User name or password mismatched."
66
+ end
67
+
68
+ break if api
69
+ end
70
+ return unless api
71
+
72
+ $stderr.puts "Authenticated successfully."
73
+
74
+ conf ||= Config.new
75
+ conf["account.user"] = user_name
76
+ conf["account.apikey"] = api.apikey
77
+ conf.save($TRD_CONFIG_PATH)
78
+
79
+ $stderr.puts "Use '#{$prog} create-database <db_name>' to create a database."
80
+ end
81
+
82
+ end
83
+ end
84
+
@@ -0,0 +1,121 @@
1
+
2
+ module TD
3
+ module Command
4
+ private
5
+ def cmd_opt(name, *args)
6
+ if args.last.to_s =~ /_$/
7
+ multi = true
8
+ args.push args.pop.to_s[0..-2]+'...'
9
+ end
10
+
11
+ req_args, opt_args = args.partition {|a| a.to_s !~ /\?$/ }
12
+ opt_args = opt_args.map {|a| a.to_s[0..-2].to_sym }
13
+ args = req_args + opt_args
14
+
15
+ args_line = req_args.map {|a| "<#{a}>" }
16
+ args_line.concat opt_args.map {|a| "[#{a}]" }
17
+ args_line = args_line.join(' ')
18
+
19
+ description = List.get_description(name)
20
+
21
+ op = OptionParser.new
22
+ op.summary_indent = " "
23
+ op.banner = <<EOF
24
+ usage: #{$prog} #{name} #{args_line}
25
+
26
+ description:
27
+ #{description.split("\n").map {|l| " #{l}" }.join("\n")}
28
+ EOF
29
+
30
+ (class<<op;self;end).module_eval do
31
+ define_method(:cmd_usage) do |msg|
32
+ puts op.to_s
33
+ puts ""
34
+ puts "error: #{msg}" if msg
35
+ exit 1
36
+ end
37
+
38
+ define_method(:cmd_parse) do
39
+ begin
40
+ parse!(ARGV)
41
+ if ARGV.length < req_args.length || (!multi && ARGV.length > args.length)
42
+ cmd_usage nil
43
+ end
44
+ if ARGV.length <= 1
45
+ ARGV[0]
46
+ else
47
+ ARGV
48
+ end
49
+ rescue
50
+ cmd_usage $!
51
+ end
52
+ end
53
+
54
+ end
55
+
56
+ op
57
+ end
58
+
59
+ def cmd_config
60
+ require 'td/config'
61
+ Config.read($TRD_CONFIG_PATH)
62
+ end
63
+
64
+ def cmd_api(conf)
65
+ apikey = conf['account.apikey']
66
+ unless apikey
67
+ raise ConfigError, "Account is not configured."
68
+ end
69
+ require 'td/api'
70
+ api = API.new(apikey)
71
+ end
72
+
73
+ def cmd_render_table(rows, *opts)
74
+ require 'hirb'
75
+ Hirb::Helpers::Table.render(rows, *opts)
76
+ end
77
+
78
+ def cmd_render_tree(nodes, *opts)
79
+ require 'hirb'
80
+ Hirb::Helpers::Tree.render(nodes, *opts)
81
+ end
82
+
83
+ def cmd_debug_error(ex)
84
+ if $verbose
85
+ $stderr.puts "error: #{$!.class}: #{$!.to_s}"
86
+ $!.backtrace.each {|b|
87
+ $stderr.puts " #{b}"
88
+ }
89
+ $stderr.puts ""
90
+ end
91
+ end
92
+
93
+ def find_database(api, db_name)
94
+ begin
95
+ return api.database(db_name)
96
+ rescue
97
+ cmd_debug_error $!
98
+ $stderr.puts $!
99
+ $stderr.puts "Use '#{$prog} show-databases' to show the list of databases."
100
+ exit 1
101
+ end
102
+ db
103
+ end
104
+
105
+ def find_table(api, db_name, table_name, type=nil)
106
+ db = find_database(api, db_name)
107
+ begin
108
+ table = db.table(table_name)
109
+ rescue
110
+ $stderr.puts $!
111
+ $stderr.puts "Use '#{$prog} show-tables #{db_name}' to show the list of tables."
112
+ exit 1
113
+ end
114
+ if type && table.type != type
115
+ $stderr.puts "Table '#{db_name}.#{table_name} is not a #{type} table but a #{table.type} table"
116
+ end
117
+ table
118
+ end
119
+ end
120
+ end
121
+
@@ -0,0 +1,82 @@
1
+
2
+ module TD
3
+ module Command
4
+
5
+ def create_database
6
+ op = cmd_opt 'create-database', :db_name
7
+ db_name = op.cmd_parse
8
+
9
+ conf = cmd_config
10
+ api = cmd_api(conf)
11
+
12
+ begin
13
+ api.create_database(db_name)
14
+ rescue AlreadyExistsError
15
+ $stderr.puts "Database '#{db_name}' already exists."
16
+ exit 1
17
+ end
18
+
19
+ $stderr.puts "Database '#{db_name}' is created."
20
+ $stderr.puts "Use '#{$prog} create-log-table #{db_name} <table_name>' to create a table."
21
+ end
22
+
23
+ def drop_database
24
+ op = cmd_opt 'drop-database', :db_name
25
+
26
+ op.banner << "\noptions:\n"
27
+
28
+ force = false
29
+ op.on('-f', '--force', 'clear tables and delete the database', TrueClass) {|b|
30
+ force = true
31
+ }
32
+
33
+ db_name = op.cmd_parse
34
+
35
+ conf = cmd_config
36
+ api = cmd_api(conf)
37
+
38
+ begin
39
+ db = api.database(db_name)
40
+
41
+ if !force && !db.tables.empty?
42
+ $stderr.puts "Database '#{db_name}' is not empty. Use '-f' option or drop tables first."
43
+ exit 1
44
+ end
45
+
46
+ db.delete
47
+ rescue NotFoundError
48
+ $stderr.puts "Database '#{db_name}' does not exist."
49
+ eixt 1
50
+ end
51
+
52
+ $stderr.puts "Database '#{db_name}' is deleted."
53
+ end
54
+
55
+ def show_databases
56
+ op = cmd_opt 'show-databases'
57
+ op.cmd_parse
58
+
59
+ conf = cmd_config
60
+ api = cmd_api(conf)
61
+
62
+ dbs = api.databases
63
+
64
+ rows = []
65
+ dbs.each {|db|
66
+ rows << {:Name => db.name}
67
+ }
68
+ puts cmd_render_table(rows, :fields => [:Name])
69
+
70
+ if dbs.empty?
71
+ $stderr.puts "There are no databases."
72
+ $stderr.puts "Use '#{$prog} create-database <db_name>' to create a database."
73
+ end
74
+ end
75
+
76
+ alias show_dbs show_databases
77
+ alias create_db create_database
78
+ alias drop_db drop_database
79
+
80
+ end
81
+ end
82
+
@@ -0,0 +1,286 @@
1
+
2
+ module TD
3
+ module Command
4
+
5
+ IMPORT_TEMPLATES = {
6
+ 'apache' => [
7
+ /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/,
8
+ ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent'],
9
+ "%d/%b/%Y:%H:%M:%S %z"],
10
+ 'syslog' => [
11
+ /^([^ ]* [^ ]* [^ ]*) ([^ ]*) ([a-zA-Z0-9_\/\.\-]*)(?:\[([0-9]+)\])?[^\:]*\: *(.*)$/,
12
+ ['time', 'host', 'ident', 'pid', 'message'],
13
+ "%b %d %H:%M:%S"],
14
+ }
15
+
16
+ # TODO import-item
17
+ # TODO tail
18
+
19
+ def import
20
+ op = cmd_opt 'import', :db_name, :table_name, :files_
21
+
22
+ op.banner << "\nsupported formats:\n"
23
+ op.banner << " apache\n"
24
+ op.banner << " syslog\n"
25
+ op.banner << " msgpack\n"
26
+ op.banner << " json\n"
27
+
28
+ op.banner << "\noptions:\n"
29
+
30
+ format = 'apache'
31
+ time_key = nil
32
+
33
+ op.on('--format FORMAT', "file format (default: #{format})") {|s|
34
+ format = s
35
+ }
36
+
37
+ op.on('--apache', "same as --format apache; apache common log format") {
38
+ format = 'apache'
39
+ }
40
+
41
+ op.on('--syslog', "same as --format syslog; syslog") {
42
+ format = 'syslog'
43
+ }
44
+
45
+ op.on('--msgpack', "same as --format msgpack; msgpack stream format") {
46
+ format = 'msgpack'
47
+ }
48
+
49
+ op.on('--json', "same as --format json; LF-separated json format") {
50
+ format = 'json'
51
+ }
52
+
53
+ op.on('-t', '--time-key COL_NAME', "time key name for json and msgpack format (e.g. 'created_at')") {|s|
54
+ time_key = s
55
+ }
56
+
57
+ db_name, table_name, *paths = op.cmd_parse
58
+
59
+ conf = cmd_config
60
+ api = cmd_api(conf)
61
+
62
+ case format
63
+ when 'json', 'msgpack'
64
+ unless time_key
65
+ $stderr.puts "-t, --time-key COL_NAME (e.g. '-t created_at') parameter is required for #{format} format"
66
+ exit 1
67
+ end
68
+ if format == 'json'
69
+ require 'json'
70
+ require 'time'
71
+ parser = JsonParser.new(time_key)
72
+ else
73
+ parser = MessagePackParser.new(time_key)
74
+ end
75
+
76
+ else
77
+ regexp, names, time_format = IMPORT_TEMPLATES[format]
78
+ if !regexp || !names || !time_format
79
+ $stderr.puts "Unknown format '#{format}'"
80
+ exit 1
81
+ end
82
+ parser = TextParser.new(names, regexp, time_format)
83
+ end
84
+
85
+ find_table(api, db_name, table_name, :log)
86
+
87
+ require 'zlib'
88
+
89
+ files = paths.map {|path|
90
+ if path == '-'
91
+ $stdin
92
+ elsif path =~ /\.gz$/
93
+ Zlib::GzipReader.open(path)
94
+ else
95
+ File.open(path)
96
+ end
97
+ }
98
+
99
+ require 'msgpack'
100
+ require 'tempfile'
101
+ #require 'thread'
102
+
103
+ files.zip(paths).each {|file,path|
104
+ import_log_file(file, path, api, db_name, table_name, parser)
105
+ }
106
+
107
+ puts "done."
108
+ end
109
+
110
+ private
111
+ def import_log_file(file, path, api, db_name, table_name, parser)
112
+ puts "importing #{path}..."
113
+
114
+ out = Tempfile.new('td-import')
115
+ writer = Zlib::GzipWriter.new(out)
116
+
117
+ n = 0
118
+ x = 0
119
+ parser.call(file, path) {|record|
120
+ writer.write record.to_msgpack
121
+
122
+ n += 1
123
+ x += 1
124
+ if n % 10000 == 0
125
+ puts " imported #{n} entries from #{path}..."
126
+
127
+ elsif out.pos > 1024*1024 # TODO size
128
+ puts " imported #{n} entries from #{path}..."
129
+ begin
130
+ writer.finish
131
+ size = out.pos
132
+ out.pos = 0
133
+
134
+ puts " uploading #{size} bytes..."
135
+ api.import(db_name, table_name, "msgpack.gz", out, size)
136
+
137
+ out.truncate(0)
138
+ out.pos = 0
139
+ x = 0
140
+ writer = Zlib::GzipWriter.new(out)
141
+ rescue
142
+ $stderr.puts " #{$!}"
143
+ return 1 # TODO error
144
+ end
145
+ end
146
+ }
147
+
148
+ if x != 0
149
+ writer.finish
150
+ size = out.pos
151
+ out.pos = 0
152
+
153
+ puts " uploading #{size} bytes..."
154
+ # TODO upload on background thread
155
+ api.import(db_name, table_name, "msgpack.gz", out, size)
156
+ end
157
+
158
+ puts " imported #{n} entries from #{path}."
159
+
160
+ ensure
161
+ out.close rescue nil
162
+ writer.close rescue nil
163
+ end
164
+
165
+ require 'date' # DateTime#strptime
166
+ require 'time' # Time#strptime, Time#parse
167
+
168
+ class TextParser
169
+ def initialize(names, regexp, time_format)
170
+ @names = names
171
+ @regexp = regexp
172
+ @time_format = time_format
173
+ end
174
+
175
+ def call(file, path, &block)
176
+ i = 0
177
+ file.each_line {|line|
178
+ i += 1
179
+ begin
180
+ m = @regexp.match(line)
181
+ unless m
182
+ raise "invalid log format at #{path}:#{i}"
183
+ end
184
+
185
+ record = {}
186
+
187
+ cap = m.captures
188
+ @names.each_with_index {|name,i|
189
+ if value = cap[i]
190
+ if name == "time"
191
+ value = parse_time(value).to_i
192
+ end
193
+ record[name] = value
194
+ end
195
+ }
196
+
197
+ block.call(record)
198
+
199
+ rescue
200
+ $stderr.puts " skipped: #{$!}: #{line.dump}"
201
+ end
202
+ }
203
+ end
204
+
205
+ if Time.respond_to?(:strptime)
206
+ def parse_time(value)
207
+ Time.strptime(value, @time_format)
208
+ end
209
+ else
210
+ def parse_time(value)
211
+ Time.parse(DateTime.strptime(value, @time_format).to_s)
212
+ end
213
+ end
214
+ end
215
+
216
+ class JsonParser
217
+ def initialize(time_key)
218
+ require 'json'
219
+ @time_key = time_key
220
+ end
221
+
222
+ def call(file, path, &block)
223
+ i = 0
224
+ file.each_line {|line|
225
+ i += 1
226
+ begin
227
+ record = JSON.parse(line)
228
+
229
+ time = record[@time_key]
230
+ unless time
231
+ raise "record doesn't have '#{@time_key}' column"
232
+ end
233
+
234
+ case time
235
+ when Integer
236
+ # do nothing
237
+ else
238
+ time = Time.parse(time.to_s).to_i
239
+ end
240
+ record['time'] = time
241
+
242
+ block.call(record)
243
+
244
+ rescue
245
+ $stderr.puts " skipped: #{$!}: #{line.dump}"
246
+ end
247
+ }
248
+ end
249
+ end
250
+
251
+ class MessagePackParser
252
+ def initialize(time_key)
253
+ require 'json'
254
+ @time_key = time_key
255
+ end
256
+
257
+ def call(file, path, &block)
258
+ i = 0
259
+ MessagePack::Unpacker.new(file).each {|record|
260
+ i += 1
261
+ begin
262
+ time = record[@time_key]
263
+ unless time
264
+ raise "record doesn't have '#{@time_key}' column"
265
+ end
266
+
267
+ case time
268
+ when Integer
269
+ # do nothing
270
+ else
271
+ time = Time.parse(time.to_s).to_i
272
+ end
273
+ record['time'] = time
274
+
275
+ block.call(record)
276
+
277
+ rescue
278
+ $stderr.puts " skipped: #{$!}: #{record.to_json}"
279
+ end
280
+ }
281
+ rescue EOFError
282
+ end
283
+ end
284
+ end
285
+ end
286
+