iij-dag 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,30 @@
1
+ module Dag
2
+ module CLI
3
+ class Import
4
+ def initialize(client, db, table, files, format, params)
5
+ @client = client
6
+ @db = db
7
+ @table = table
8
+ @files = files
9
+ @params = params
10
+ @format = format
11
+ end
12
+
13
+ def execute
14
+ begin
15
+ @client.database(@db)
16
+ rescue Dag::Client::DatabaseNotFound
17
+ @client.create_database(@db)
18
+ end
19
+
20
+ begin
21
+ @client.database(@db).table(@table)
22
+ rescue Dag::Client::TableNotFound
23
+ @client.create_table(@db, @table, format: @format)
24
+ end
25
+
26
+ @client.import(@db, @table, @files, @params)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,14 @@
1
+ module Dag
2
+ module CLI
3
+ class Query
4
+ def initialize(client, params)
5
+ @client = client
6
+ @params = params
7
+ end
8
+
9
+ def execute
10
+ @client.query(@params)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ module Dag
2
+ module CLI
3
+ class SubCommand < Base
4
+ include Dag::CLI::Utils::TimeFormat
5
+
6
+ def self.banner(task, namespace = false, subcommand = true)
7
+ super
8
+ end
9
+
10
+ desc 'help [COMMAND]', 'Describe available commands or one specific command'
11
+ def help(command = nil, subcommand = false)
12
+ super
13
+ cmd = self.class.name.split('::').last.downcase
14
+ shell.say "Type 'dag #{cmd} help [COMMAND]' for more information on a specific command"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,90 @@
1
+ # coding: utf-8
2
+
3
+ require 'pathname'
4
+
5
+ module Dag
6
+ module CLI
7
+ class Bucket < SubCommand
8
+ desc 'list ([BUCKET])', 'show bucket/object list'
9
+ option :prefix, type: :string, aliases: '-p', desc: "prefix filter", default: nil
10
+ option :delimiter, type: :string, aliases: '-d', desc: "object delimiter", default: nil
11
+ def list(bucket = nil)
12
+ fields = [:name]
13
+
14
+ unless bucket
15
+ rows = handle_api_failure do
16
+ client.buckets
17
+ end
18
+ else
19
+ rows = handle_api_failure do
20
+ bucket = client.buckets[bucket]
21
+ bucket.objects.where(prefix: options[:prefix], delimiter: options[:delimiter])
22
+ end
23
+ end
24
+
25
+ handle_api_failure do
26
+ rows.each do |row|
27
+ puts row.name
28
+ end
29
+ end
30
+ end
31
+
32
+ desc 'read [BUCKET] [OBJECT]', 'read object'
33
+ def read(bucket, object)
34
+ rows = handle_api_failure do
35
+ bucket = client.buckets[bucket]
36
+ bucket.objects[object].read
37
+ end
38
+ print rows
39
+ end
40
+
41
+ desc 'create [BUCKET]', 'create bucket'
42
+ def create(bucket)
43
+ rows = handle_api_failure do
44
+ client.create_bucket(bucket)
45
+ end
46
+ say_status("Create bucket", rows.name)
47
+ end
48
+
49
+ desc 'delete [BUCKET] ([OBJECT])', 'delete bucket or object'
50
+ def delete(bucket, *objects)
51
+ handle_api_failure do
52
+ if objects.empty?
53
+ # for bucket
54
+ client.delete_bucket(bucket)
55
+ say_status("Delete bucket", bucket)
56
+ else
57
+ # for objects
58
+ th = []
59
+ objects.each do |object|
60
+ th << Thread.new do
61
+ client.delete_object(bucket, object)
62
+ say_status("Delete object", File.join(bucket, object))
63
+ end
64
+ end
65
+ th.map(&:join)
66
+ end
67
+ end
68
+ end
69
+
70
+ desc 'write [BUCKET] [OBJECT]', 'write object / string or file path'
71
+ option :data, type: :string, aliases: '-d', desc: "string or file path", required: true
72
+ option :multipart, type: :boolean, aliases: '-m', desc: "multipart upload"
73
+ option :multipart_jobs, type: :numeric, aliases: '-j', desc: "multipart upload jobs"
74
+ option :multipart_splitsz, type: :numeric, aliases: '-s', desc: "multipart upload split size"
75
+ def write(bucket, object)
76
+ params = {}
77
+ params.merge!(multipart: true) if options[:multipart]
78
+ params.merge!(jobs: options[:multipart_jobs]) if options[:multipart_jobs]
79
+ params.merge!(splitsz: options[:multipart_splitsz]) if options[:multipart_splitsz]
80
+
81
+ data = File.exist?(options[:data]) ? Pathname.new(options[:data]) : options[:data]
82
+ handle_api_failure do
83
+ obj = client.buckets[bucket].objects[object]
84
+ obj.write(data, params)
85
+ say_status("Write object", File.join(bucket, object))
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,187 @@
1
+ # coding: utf-8
2
+ module Dag
3
+ module CLI
4
+ class Cluster < SubCommand
5
+ include Dag::CLI::Utils::NumberToHuman
6
+
7
+ class NotClusterFound < StandardError; end
8
+
9
+ desc 'attach ([CLUSTER])', 'Attach cluster'
10
+ def attach(cluster = nil)
11
+ if cluster
12
+ attached_cluster = handle_api_failure do
13
+ open_client{ cluster }.cluster
14
+ end
15
+
16
+ local_store { |db| db[:cluster] = cluster }
17
+ say_status "Attach cluster", attached_cluster.name
18
+ else
19
+ cluster = local_store { |db| db[:cluster] }
20
+ begin
21
+ raise NotClusterFound if cluster.nil?
22
+ say_status "Attached cluster", cluster
23
+ rescue NotClusterFound
24
+ abort "Please attach to the cluster\nType 'dag cluster attach [CLUSTER]'"
25
+ end
26
+ end
27
+ end
28
+
29
+ desc 'detach', 'Detach cluster'
30
+ def detach
31
+ attached_cluster = ''
32
+ local_store do |db|
33
+ attached_cluster = db[:cluster]
34
+ raise NotClusterFound if attached_cluster.nil?
35
+ db[:cluster] = nil
36
+ end
37
+ say_status "Detach cluster", attached_cluster
38
+ rescue NotClusterFound
39
+ abort "Please attach to the cluster\nType 'dag attach [CLUSTER]'"
40
+ end
41
+
42
+ desc 'list', 'show cluster list'
43
+ def list
44
+ headers = %w(# name status type instances)
45
+ rows = []
46
+ attach_cluster = local_store { |db| db[:cluster] }
47
+
48
+ handle_api_failure do
49
+ client.clusters.each do |cluster|
50
+ total_container = cluster.instances.lazy.map{|c| c["quantity"] }.inject(:+)
51
+ cluster_row = [cluster.name, cluster.status, cluster.type, total_container || 0]
52
+ rows << if attach_cluster == cluster.name
53
+ cluster_row.unshift('*')
54
+ else
55
+ cluster_row.unshift(nil)
56
+ end
57
+ end
58
+ end
59
+
60
+ terminal_table(rows, headers: headers, max_width: Float::INFINITY)
61
+ end
62
+
63
+ desc 'info', 'Cluster instance info id/dfs_used/non_dfs_used/capacity/grade'
64
+ def info
65
+ headers = %w(instance_id grade dfs_used non_dfs_used capacity)
66
+ rows = []
67
+
68
+ cluster = handle_api_failure do
69
+ open_client.cluster
70
+ end
71
+ statistics = cluster.statistics
72
+
73
+ unless statistics
74
+ return say_status(
75
+ "InvalidClusterState",
76
+ "Cluster status is invalid: #{cluster.status}",
77
+ :red
78
+ )
79
+ end
80
+
81
+ statistics.instances.each do |instance|
82
+ rows << [
83
+ instance.instance_id,
84
+ instance.grade,
85
+ number_to_human(instance.disk.dfs_used),
86
+ number_to_human(instance.disk.non_dfs_used),
87
+ number_to_human(instance.disk.capacity)
88
+ ]
89
+ end
90
+
91
+ puts "Cluster: #{cluster.name}"
92
+ terminal_table(rows, headers: headers)
93
+ end
94
+
95
+ desc 'restart', 'restart cluster'
96
+ option :force, type: :boolean, aliases: '-f', default: false, desc: 'cluster force restart'
97
+ option :type, type: :string, aliases: '-t', default: nil, desc: 'change cluster type'
98
+ option :wait, type: :boolean, aliases: '-w', default: false, desc: 'wait until normal cluster status'
99
+ option :assumeyes, type: :boolean, aliases: '-y', desc: 'assume that the answer to any question which would be asked is yes'
100
+ def restart
101
+ unless options[:assumeyes]
102
+ loop do
103
+ ans = ask 'Is this ok [y/N]:'
104
+ case ans
105
+ when 'y'
106
+ break
107
+ when 'N'
108
+ exit
109
+ end
110
+ end
111
+ end
112
+
113
+ headers = %w(name type force)
114
+ rows = []
115
+
116
+ cluster = handle_api_failure do
117
+ open_client.cluster
118
+ end
119
+
120
+ options[:type] ||= cluster.type
121
+ params = { force: options[:force], type: options[:type] }
122
+
123
+ handle_api_failure do
124
+ cluster.restart(params)
125
+ end
126
+
127
+ rows << [cluster.name, params[:type], params[:force]]
128
+
129
+ terminal_table(rows, headers: headers)
130
+
131
+ if options[:wait]
132
+ puts
133
+ puts 'wait cluster ready...'
134
+ wait_ready
135
+ end
136
+ end
137
+
138
+ desc 'wait_ready', 'wait cluster ready'
139
+ def wait_ready
140
+ pretime = Time.now
141
+ prestate = ""
142
+ state = []
143
+ state_num = 0
144
+ while true do
145
+ cluster = handle_api_failure do
146
+ open_client.cluster
147
+ end
148
+
149
+ st = cluster.status
150
+ now = Proc.new { "#{st}(#{(Time.now-pretime).round}s)" }
151
+ fiber = Fiber.new {|state| Fiber.yield state }
152
+
153
+ if prestate != st
154
+ prestate = st
155
+ pretime = Time.now
156
+ state << now.call
157
+ state_num += 1
158
+ end
159
+
160
+ state[state_num-1] = now.call
161
+ print "#{fiber.resume(state*' ')}\r"
162
+ $stdout.flush
163
+
164
+ if %w(norm failed ptfailed error).include? st
165
+ print "\n"
166
+ break
167
+ end
168
+
169
+ sleep 1
170
+ end
171
+ end
172
+
173
+ desc 'log [OUTPUT_PATH]', 'export cluster log'
174
+ option :compress, type: :boolean, aliases: '-c', default: false
175
+ def log(output_log_path)
176
+ params = {
177
+ compress: options[:compress],
178
+ output_log_path: output_log_path
179
+ }
180
+ handle_api_failure do
181
+ cluster = open_client.cluster
182
+ cluster.export_log(params)
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ module Dag
3
+ module CLI
4
+ class Db < SubCommand
5
+
6
+ desc 'list', 'Show database list'
7
+ def list
8
+ fields = %w(name)
9
+ rows = handle_api_failure { open_client.databases }
10
+ terminal_table(rows, fields: fields)
11
+ end
12
+
13
+ desc 'create [DATABASE]', 'Create database'
14
+ def create(db_name)
15
+ database = handle_api_failure do
16
+ open_client.create_database(db_name)
17
+ end
18
+
19
+ say_status "Create Database", database.name
20
+ end
21
+
22
+ desc 'delete [DATABASE]', 'Delete database'
23
+ def delete(db_name)
24
+ db = ''
25
+ handle_api_failure do
26
+ db = open_client.database(db_name)
27
+ db.delete if db
28
+ end
29
+
30
+ if db
31
+ say_status "Delete Database", db_name
32
+ else
33
+ say_status "Unknown Database", db_name, :red
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,341 @@
1
+ # coding: utf-8
2
+
3
+ require 'ruby-progressbar'
4
+ require 'csv'
5
+ require 'anbt-sql-formatter/formatter'
6
+
7
+ module Dag
8
+ module CLI
9
+ class Job < SubCommand
10
+
11
+ desc 'list', 'Jobs list'
12
+ option :order, type: :string, aliases: '-o', desc: "list order [asc|desc]"
13
+ option :filter, type: :hash, aliases: '-f', desc: "list filter key [status|type|cluster_name|label|cluster_rebooted]"
14
+ option :limit, type: :numeric, aliases: '-n', desc: "The location is number lines"
15
+ def list
16
+ headers = %w(id status start_at type cluster query)
17
+ rows = []
18
+
19
+ if options[:filter] && cluster_rebooted = options[:filter]['cluster_rebooted']
20
+ options[:filter]['cluster_rebooted'] = case cluster_rebooted
21
+ when "true"
22
+ true
23
+ when "false"
24
+ false
25
+ end
26
+ end
27
+
28
+ handle_api_failure do
29
+ jobs = client.jobs.where(cluster_name: open_client.cluster_name)
30
+
31
+ if options[:filter]
32
+ jobs = jobs.where(options[:filter].symbolize_keys)
33
+ end
34
+
35
+ if options[:limit]
36
+ jobs = jobs.limit(options[:limit])
37
+ end
38
+
39
+ iterator = :each
40
+ order = if options[:order]
41
+ options[:order]
42
+ else
43
+ if [:limit]
44
+ iterator = :reverse_each
45
+ 'desc'
46
+ else
47
+ 'asc'
48
+ end
49
+ end
50
+
51
+ jobs.order(order).send(iterator) do |job|
52
+ row = [ job.id ]
53
+ row += [ job.status ]
54
+ row += [ time_format(job.start_at) ]
55
+ row += [ job.type ]
56
+ row += [ job.cluster ]
57
+ row += [ job.query ]
58
+ rows << row
59
+ end
60
+ end
61
+
62
+ terminal_table(rows, headers: headers)
63
+ end
64
+
65
+ desc 'info [JOB_ID]', 'Job info'
66
+ def info(job_id)
67
+ headers = ["item", "info"]
68
+ rows = []
69
+
70
+ job = handle_api_failure do
71
+ client.job(job_id)
72
+ end
73
+
74
+ rows << ["id", job.id]
75
+ rows << ["status", job.status]
76
+ rows << ["start_at", time_format(job.start_at)]
77
+ rows << ["type", job.type]
78
+ rows << ["dsl", job.dsl]
79
+ rows << ["cluster", job.cluster]
80
+ rows << ["cluster_rebooted", job.cluster_rebooted]
81
+ rows << ["label", job.label]
82
+ rows << ["stage", job.stage]
83
+ rows << ["progress", job.progress]
84
+ rows << ["access_key_id", job.access_key_id]
85
+
86
+ case job.dsl
87
+ when "hive"
88
+ rows << ["output_format", job.output_format]
89
+ rows << ["output_resource_path", job.output_resource_path]
90
+ query = sql_format(job.query).split("\n")
91
+ query.each_with_index do |line, i|
92
+ rows << ['', line]
93
+ rows.last[0] = 'query' if i == 0
94
+ end
95
+ when "mapreduce"
96
+ rows << ["job_id", job.job_id]
97
+ rows << ["schema", job.schema]
98
+ rows << ["input_object_keys", job.input_object_keys]
99
+ rows << ["input_format", job.input_format]
100
+ rows << ["output_database", job.output_database]
101
+ rows << ["output_table", job.output_table]
102
+ end
103
+
104
+ # vertical, unicode, tab, markdown, simple
105
+ terminal_table(rows, headers: headers, max_width: Float::INFINITY)
106
+ end
107
+
108
+ desc 'reuse [JOB_ID]', 'reuse job'
109
+ option :output, type: :string, aliases: '-o', desc: 'new output storage path'
110
+ option :format, type: :string, aliases: '-f', desc: 'new output format [csv/tsv]'
111
+ option :label, type: :string, aliases: '-l', desc: 'new job label'
112
+ option :query, type: :string, aliases: '-q', desc: 'new query'
113
+ option :wait, type: :boolean, aliases: '-w', desc: 'wait job finish'
114
+ def reuse(job_id)
115
+ reuse_job = handle_api_failure do
116
+ client.job(job_id)
117
+ end
118
+
119
+ if reuse_job.type == "select"
120
+ query_string = options[:query]
121
+ if query_string
122
+ if File.exists? query_string
123
+ query_string = open(query_string).read
124
+ puts "Query: #{query_string}"
125
+ end
126
+ else
127
+ query_string = reuse_job.query
128
+ end
129
+
130
+ output_format = options[:format]
131
+ unless output_format
132
+ output_format = reuse_job.output_format
133
+ end
134
+
135
+ output_resource_path = options[:output]
136
+ unless output_resource_path
137
+ output_resource_path = reuse_job.output_resource_path
138
+ end
139
+
140
+ label = options[:label]
141
+ unless label
142
+ label = reuse_job.label
143
+ end
144
+
145
+ params = {
146
+ query: query_string,
147
+ output_format: output_format,
148
+ output_resource_path: output_resource_path,
149
+ label: label
150
+ }
151
+
152
+ query = Query.new(open_client, params)
153
+ job = handle_api_failure { query.execute }
154
+
155
+ say_status "accepted job", "job_id: #{job.id}"
156
+ if options[:wait]
157
+ CLI::Command.start(["job", "log", job.id.to_s, "-t"])
158
+ CLI::Command.start(["job", "result", job.id.to_s])
159
+ end
160
+ else
161
+ say_status "invalid job type", "job_type: #{reuse_job.type}"
162
+ exit 1
163
+ end
164
+ end
165
+
166
+ desc 'log [JOB_ID]', 'show job log'
167
+ option :tail, type: :boolean, aliases: '-t', desc: 'tail -f'
168
+ def log(job_id)
169
+ if options[:tail]
170
+ mark=nil
171
+ while true
172
+ jst, log = handle_api_failure {
173
+ j = client.job(job_id)
174
+ [j.running?, j.log]
175
+ }
176
+ lines = log.split("\n")
177
+ if mark
178
+ n = lines.rindex(mark)
179
+ lines = lines[(n+1)..-1] if n
180
+ end
181
+ unless lines.empty?
182
+ mark=lines[-1]
183
+ puts lines.join("\n")
184
+ end
185
+ break unless jst
186
+ sleep 1
187
+ end
188
+ else
189
+ log = handle_api_failure do
190
+ client.job(job_id).log
191
+ end
192
+
193
+ puts log
194
+ end
195
+ end
196
+
197
+ desc 'result [JOB_ID]', 'Create download url'
198
+ option :format, type: :string, aliases: '-f', desc: 'output format [csv|tsv|table|url|time]', default: "table"
199
+ def result(job_id)
200
+ if options[:format] == "url"
201
+ download_url = handle_api_failure do
202
+ client.job(job_id).download_urls
203
+ end
204
+ puts download_url
205
+ return
206
+ end
207
+
208
+ unless job_id.include? "/"
209
+ job = handle_api_failure do
210
+ client.job(job_id)
211
+ end
212
+ target = job.output_resource_path
213
+ else
214
+ target = job_id
215
+ end
216
+
217
+ unless target
218
+ if job.try(:type) == 'split'
219
+ say_status 'ERROR', 'Invalid job type', :red
220
+ else
221
+ say_status 'ERROR', 'Invalid job_id or dag://bucket/prefix/', :red
222
+ end
223
+ exit 1
224
+ end
225
+
226
+ unless job && job.finished?
227
+ say_status 'ERROR', "Job is not finished (status=#{job.status})", :red
228
+ return
229
+ end
230
+
231
+ uri = URI.parse(target)
232
+ bucket = uri.host
233
+ prefix = uri.path[1..-1]
234
+ bucket = client.buckets[bucket]
235
+ rsm = handle_api_failure do
236
+ bucket.objects[File.join(prefix, ".resultsetmetadata")].read
237
+ end
238
+
239
+ if rsm.headers['Last-Modified'].present?
240
+ finishedat = Time.parse(rsm.headers['Last-Modified'].first)
241
+ end
242
+
243
+ if %w(table time).include? options[:format]
244
+ h = []
245
+ h << "job start at #{time_format(job.start_at)}" if job
246
+ h << "finished #{time_format(finishedat)}" if finishedat
247
+ h << "#{(finishedat-job.start_at).to_i} seconds." if job && finishedat
248
+ puts h.join(", ")
249
+ return if options[:format]=="time"
250
+ end
251
+
252
+ field_separator = ","
253
+ field_separator = "\t" if job.output_format == "tsv"
254
+
255
+ hdr = Oj.load(rsm)
256
+ hdrs=[]
257
+ hdr["columns"].each{|e| hdrs[e["position"].to_i-1] = e["name"] }
258
+ rows = handle_api_failure do
259
+ bucket.objects.where(prefix: prefix)
260
+ .select{|s| s.name.include? "/0" }
261
+ .map{|e| CSV.parse(bucket.objects[e.name].read, col_sep: field_separator) }
262
+ end
263
+
264
+ rows.each{|obj|
265
+ case options[:format]
266
+ when 'csv'
267
+ puts hdrs.join(",")
268
+ puts obj.map{|m| m.join(",") }.join("\n")
269
+ when 'tsv'
270
+ puts hdrs.join("\t")
271
+ puts obj.map{|m| m.join("\t") }.join("\n")
272
+ else
273
+ terminal_table(obj, headers: hdrs)
274
+ end
275
+ }
276
+ end
277
+
278
+ desc 'kill! [JOB_ID]', 'Stop job'
279
+ def kill!(job_id)
280
+ handle_api_failure do
281
+ client.job(job_id).kill
282
+ end
283
+
284
+ say_status("Cancelling job", job_id)
285
+ end
286
+
287
+ desc 'progress [JOB_ID]', 'Show Progress Bar'
288
+ def progress(job_id=nil)
289
+ if job_id
290
+ jobs = [job_id]
291
+ else
292
+ jobs = handle_api_failure{
293
+ client.jobs.select{|j| j.running? }.map{|j| j.id }
294
+ }
295
+ end
296
+ return unless jobs.length==1
297
+ progress_bar = ProgressBar.create(total: 100, format: '%t %a <%B> %p%% %e', autofinish: false)
298
+ stage = nil
299
+ while true
300
+ job = handle_api_failure{ client.job(jobs[0]) }
301
+ if ! job.running?
302
+ progress_bar.finish
303
+ puts "#{job.status}"
304
+ break
305
+ end
306
+ if job.stage && stage!=job.stage
307
+ progress_bar.finish if stage
308
+ progress_bar = ProgressBar.create(title: "Stage-#{job.stage}/Job-#{jobs[0]}", total: 100, format: '%t %a <%B> %p%% %e', autofinish: false)
309
+ stage = job.stage
310
+ end
311
+ progress_bar.progress = job.progress.to_f if job.progress && stage == job.stage
312
+ sleep 1
313
+ end
314
+ end
315
+
316
+ desc 'wait [JOB_ID]', 'wait job'
317
+ def wait(job_id=nil)
318
+ while true
319
+ if job_id
320
+ job = handle_api_failure{ client.job(job_id) }
321
+ break unless job.running?
322
+ else
323
+ jobs = handle_api_failure{
324
+ client.jobs.select{|j| j.running? }
325
+ }
326
+ break if jobs.empty?
327
+ end
328
+ sleep 1
329
+ end
330
+ end
331
+
332
+ private
333
+
334
+ def sql_format(query)
335
+ tmp = query.dup
336
+ sql_formatter = AnbtSql::Formatter.new(AnbtSql::Rule.new)
337
+ sql_formatter.format(tmp).to_s
338
+ end
339
+ end
340
+ end
341
+ end