iij-dag 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ module Dag
2
+ module CLI
3
+ class Import
4
+ def initialize(client, db, table, files, format, params)
5
+ @client = client
6
+ @db = db
7
+ @table = table
8
+ @files = files
9
+ @params = params
10
+ @format = format
11
+ end
12
+
13
+ def execute
14
+ begin
15
+ @client.database(@db)
16
+ rescue Dag::Client::DatabaseNotFound
17
+ @client.create_database(@db)
18
+ end
19
+
20
+ begin
21
+ @client.database(@db).table(@table)
22
+ rescue Dag::Client::TableNotFound
23
+ @client.create_table(@db, @table, format: @format)
24
+ end
25
+
26
+ @client.import(@db, @table, @files, @params)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,14 @@
1
+ module Dag
2
+ module CLI
3
+ class Query
4
+ def initialize(client, params)
5
+ @client = client
6
+ @params = params
7
+ end
8
+
9
+ def execute
10
+ @client.query(@params)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ module Dag
2
+ module CLI
3
+ class SubCommand < Base
4
+ include Dag::CLI::Utils::TimeFormat
5
+
6
+ def self.banner(task, namespace = false, subcommand = true)
7
+ super
8
+ end
9
+
10
+ desc 'help [COMMAND]', 'Describe available commands or one specific command'
11
+ def help(command = nil, subcommand = false)
12
+ super
13
+ cmd = self.class.name.split('::').last.downcase
14
+ shell.say "Type 'dag #{cmd} help [COMMAND]' for more information on a specific command"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,90 @@
1
+ # coding: utf-8
2
+
3
+ require 'pathname'
4
+
5
+ module Dag
6
+ module CLI
7
+ class Bucket < SubCommand
8
+ desc 'list ([BUCKET])', 'show bucket/object list'
9
+ option :prefix, type: :string, aliases: '-p', desc: "prefix filter", default: nil
10
+ option :delimiter, type: :string, aliases: '-d', desc: "object delimiter", default: nil
11
+ def list(bucket = nil)
12
+ fields = [:name]
13
+
14
+ unless bucket
15
+ rows = handle_api_failure do
16
+ client.buckets
17
+ end
18
+ else
19
+ rows = handle_api_failure do
20
+ bucket = client.buckets[bucket]
21
+ bucket.objects.where(prefix: options[:prefix], delimiter: options[:delimiter])
22
+ end
23
+ end
24
+
25
+ handle_api_failure do
26
+ rows.each do |row|
27
+ puts row.name
28
+ end
29
+ end
30
+ end
31
+
32
+ desc 'read [BUCKET] [OBJECT]', 'read object'
33
+ def read(bucket, object)
34
+ rows = handle_api_failure do
35
+ bucket = client.buckets[bucket]
36
+ bucket.objects[object].read
37
+ end
38
+ print rows
39
+ end
40
+
41
+ desc 'create [BUCKET]', 'create bucket'
42
+ def create(bucket)
43
+ rows = handle_api_failure do
44
+ client.create_bucket(bucket)
45
+ end
46
+ say_status("Create bucket", rows.name)
47
+ end
48
+
49
+ desc 'delete [BUCKET] ([OBJECT])', 'delete bucket or object'
50
+ def delete(bucket, *objects)
51
+ handle_api_failure do
52
+ if objects.empty?
53
+ # for bucket
54
+ client.delete_bucket(bucket)
55
+ say_status("Delete bucket", bucket)
56
+ else
57
+ # for objects
58
+ th = []
59
+ objects.each do |object|
60
+ th << Thread.new do
61
+ client.delete_object(bucket, object)
62
+ say_status("Delete object", File.join(bucket, object))
63
+ end
64
+ end
65
+ th.map(&:join)
66
+ end
67
+ end
68
+ end
69
+
70
+ desc 'write [BUCKET] [OBJECT]', 'write object / string or file path'
71
+ option :data, type: :string, aliases: '-d', desc: "string or file path", required: true
72
+ option :multipart, type: :boolean, aliases: '-m', desc: "multipart upload"
73
+ option :multipart_jobs, type: :numeric, aliases: '-j', desc: "multipart upload jobs"
74
+ option :multipart_splitsz, type: :numeric, aliases: '-s', desc: "multipart upload split size"
75
+ def write(bucket, object)
76
+ params = {}
77
+ params.merge!(multipart: true) if options[:multipart]
78
+ params.merge!(jobs: options[:multipart_jobs]) if options[:multipart_jobs]
79
+ params.merge!(splitsz: options[:multipart_splitsz]) if options[:multipart_splitsz]
80
+
81
+ data = File.exist?(options[:data]) ? Pathname.new(options[:data]) : options[:data]
82
+ handle_api_failure do
83
+ obj = client.buckets[bucket].objects[object]
84
+ obj.write(data, params)
85
+ say_status("Write object", File.join(bucket, object))
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,187 @@
1
+ # coding: utf-8
2
+ module Dag
3
+ module CLI
4
+ class Cluster < SubCommand
5
+ include Dag::CLI::Utils::NumberToHuman
6
+
7
+ class NotClusterFound < StandardError; end
8
+
9
+ desc 'attach ([CLUSTER])', 'Attach cluster'
10
+ def attach(cluster = nil)
11
+ if cluster
12
+ attached_cluster = handle_api_failure do
13
+ open_client{ cluster }.cluster
14
+ end
15
+
16
+ local_store { |db| db[:cluster] = cluster }
17
+ say_status "Attach cluster", attached_cluster.name
18
+ else
19
+ cluster = local_store { |db| db[:cluster] }
20
+ begin
21
+ raise NotClusterFound if cluster.nil?
22
+ say_status "Attached cluster", cluster
23
+ rescue NotClusterFound
24
+ abort "Please attach to the cluster\nType 'dag cluster attach [CLUSTER]'"
25
+ end
26
+ end
27
+ end
28
+
29
+ desc 'detach', 'Detach cluster'
30
+ def detach
31
+ attached_cluster = ''
32
+ local_store do |db|
33
+ attached_cluster = db[:cluster]
34
+ raise NotClusterFound if attached_cluster.nil?
35
+ db[:cluster] = nil
36
+ end
37
+ say_status "Detach cluster", attached_cluster
38
+ rescue NotClusterFound
39
+ abort "Please attach to the cluster\nType 'dag attach [CLUSTER]'"
40
+ end
41
+
42
+ desc 'list', 'show cluster list'
43
+ def list
44
+ headers = %w(# name status type instances)
45
+ rows = []
46
+ attach_cluster = local_store { |db| db[:cluster] }
47
+
48
+ handle_api_failure do
49
+ client.clusters.each do |cluster|
50
+ total_container = cluster.instances.lazy.map{|c| c["quantity"] }.inject(:+)
51
+ cluster_row = [cluster.name, cluster.status, cluster.type, total_container || 0]
52
+ rows << if attach_cluster == cluster.name
53
+ cluster_row.unshift('*')
54
+ else
55
+ cluster_row.unshift(nil)
56
+ end
57
+ end
58
+ end
59
+
60
+ terminal_table(rows, headers: headers, max_width: Float::INFINITY)
61
+ end
62
+
63
+ desc 'info', 'Cluster instance info id/dfs_used/non_dfs_used/capacity/grade'
64
+ def info
65
+ headers = %w(instance_id grade dfs_used non_dfs_used capacity)
66
+ rows = []
67
+
68
+ cluster = handle_api_failure do
69
+ open_client.cluster
70
+ end
71
+ statistics = cluster.statistics
72
+
73
+ unless statistics
74
+ return say_status(
75
+ "InvalidClusterState",
76
+ "Cluster status is invalid: #{cluster.status}",
77
+ :red
78
+ )
79
+ end
80
+
81
+ statistics.instances.each do |instance|
82
+ rows << [
83
+ instance.instance_id,
84
+ instance.grade,
85
+ number_to_human(instance.disk.dfs_used),
86
+ number_to_human(instance.disk.non_dfs_used),
87
+ number_to_human(instance.disk.capacity)
88
+ ]
89
+ end
90
+
91
+ puts "Cluster: #{cluster.name}"
92
+ terminal_table(rows, headers: headers)
93
+ end
94
+
95
+ desc 'restart', 'restart cluster'
96
+ option :force, type: :boolean, aliases: '-f', default: false, desc: 'cluster force restart'
97
+ option :type, type: :string, aliases: '-t', default: nil, desc: 'change cluster type'
98
+ option :wait, type: :boolean, aliases: '-w', default: false, desc: 'wait until normal cluster status'
99
+ option :assumeyes, type: :boolean, aliases: '-y', desc: 'assume that the answer to any question which would be asked is yes'
100
+ def restart
101
+ unless options[:assumeyes]
102
+ loop do
103
+ ans = ask 'Is this ok [y/N]:'
104
+ case ans
105
+ when 'y'
106
+ break
107
+ when 'N'
108
+ exit
109
+ end
110
+ end
111
+ end
112
+
113
+ headers = %w(name type force)
114
+ rows = []
115
+
116
+ cluster = handle_api_failure do
117
+ open_client.cluster
118
+ end
119
+
120
+ options[:type] ||= cluster.type
121
+ params = { force: options[:force], type: options[:type] }
122
+
123
+ handle_api_failure do
124
+ cluster.restart(params)
125
+ end
126
+
127
+ rows << [cluster.name, params[:type], params[:force]]
128
+
129
+ terminal_table(rows, headers: headers)
130
+
131
+ if options[:wait]
132
+ puts
133
+ puts 'wait cluster ready...'
134
+ wait_ready
135
+ end
136
+ end
137
+
138
+ desc 'wait_ready', 'wait cluster ready'
139
+ def wait_ready
140
+ pretime = Time.now
141
+ prestate = ""
142
+ state = []
143
+ state_num = 0
144
+ while true do
145
+ cluster = handle_api_failure do
146
+ open_client.cluster
147
+ end
148
+
149
+ st = cluster.status
150
+ now = Proc.new { "#{st}(#{(Time.now-pretime).round}s)" }
151
+ fiber = Fiber.new {|state| Fiber.yield state }
152
+
153
+ if prestate != st
154
+ prestate = st
155
+ pretime = Time.now
156
+ state << now.call
157
+ state_num += 1
158
+ end
159
+
160
+ state[state_num-1] = now.call
161
+ print "#{fiber.resume(state*' ')}\r"
162
+ $stdout.flush
163
+
164
+ if %w(norm failed ptfailed error).include? st
165
+ print "\n"
166
+ break
167
+ end
168
+
169
+ sleep 1
170
+ end
171
+ end
172
+
173
+ desc 'log [OUTPUT_PATH]', 'export cluster log'
174
+ option :compress, type: :boolean, aliases: '-c', default: false
175
+ def log(output_log_path)
176
+ params = {
177
+ compress: options[:compress],
178
+ output_log_path: output_log_path
179
+ }
180
+ handle_api_failure do
181
+ cluster = open_client.cluster
182
+ cluster.export_log(params)
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ module Dag
3
+ module CLI
4
+ class Db < SubCommand
5
+
6
+ desc 'list', 'Show database list'
7
+ def list
8
+ fields = %w(name)
9
+ rows = handle_api_failure { open_client.databases }
10
+ terminal_table(rows, fields: fields)
11
+ end
12
+
13
+ desc 'create [DATABASE]', 'Create database'
14
+ def create(db_name)
15
+ database = handle_api_failure do
16
+ open_client.create_database(db_name)
17
+ end
18
+
19
+ say_status "Create Database", database.name
20
+ end
21
+
22
+ desc 'delete [DATABASE]', 'Delete database'
23
+ def delete(db_name)
24
+ db = ''
25
+ handle_api_failure do
26
+ db = open_client.database(db_name)
27
+ db.delete if db
28
+ end
29
+
30
+ if db
31
+ say_status "Delete Database", db_name
32
+ else
33
+ say_status "Unknown Database", db_name, :red
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,341 @@
1
+ # coding: utf-8
2
+
3
+ require 'ruby-progressbar'
4
+ require 'csv'
5
+ require 'anbt-sql-formatter/formatter'
6
+
7
+ module Dag
8
+ module CLI
9
+ class Job < SubCommand
10
+
11
+ desc 'list', 'Jobs list'
12
+ option :order, type: :string, aliases: '-o', desc: "list order [asc|desc]"
13
+ option :filter, type: :hash, aliases: '-f', desc: "list filter key [status|type|cluster_name|label|cluster_rebooted]"
14
+ option :limit, type: :numeric, aliases: '-n', desc: "The location is number lines"
15
+ def list
16
+ headers = %w(id status start_at type cluster query)
17
+ rows = []
18
+
19
+ if options[:filter] && cluster_rebooted = options[:filter]['cluster_rebooted']
20
+ options[:filter]['cluster_rebooted'] = case cluster_rebooted
21
+ when "true"
22
+ true
23
+ when "false"
24
+ false
25
+ end
26
+ end
27
+
28
+ handle_api_failure do
29
+ jobs = client.jobs.where(cluster_name: open_client.cluster_name)
30
+
31
+ if options[:filter]
32
+ jobs = jobs.where(options[:filter].symbolize_keys)
33
+ end
34
+
35
+ if options[:limit]
36
+ jobs = jobs.limit(options[:limit])
37
+ end
38
+
39
+ iterator = :each
40
+ order = if options[:order]
41
+ options[:order]
42
+ else
43
+ if [:limit]
44
+ iterator = :reverse_each
45
+ 'desc'
46
+ else
47
+ 'asc'
48
+ end
49
+ end
50
+
51
+ jobs.order(order).send(iterator) do |job|
52
+ row = [ job.id ]
53
+ row += [ job.status ]
54
+ row += [ time_format(job.start_at) ]
55
+ row += [ job.type ]
56
+ row += [ job.cluster ]
57
+ row += [ job.query ]
58
+ rows << row
59
+ end
60
+ end
61
+
62
+ terminal_table(rows, headers: headers)
63
+ end
64
+
65
+ desc 'info [JOB_ID]', 'Job info'
66
+ def info(job_id)
67
+ headers = ["item", "info"]
68
+ rows = []
69
+
70
+ job = handle_api_failure do
71
+ client.job(job_id)
72
+ end
73
+
74
+ rows << ["id", job.id]
75
+ rows << ["status", job.status]
76
+ rows << ["start_at", time_format(job.start_at)]
77
+ rows << ["type", job.type]
78
+ rows << ["dsl", job.dsl]
79
+ rows << ["cluster", job.cluster]
80
+ rows << ["cluster_rebooted", job.cluster_rebooted]
81
+ rows << ["label", job.label]
82
+ rows << ["stage", job.stage]
83
+ rows << ["progress", job.progress]
84
+ rows << ["access_key_id", job.access_key_id]
85
+
86
+ case job.dsl
87
+ when "hive"
88
+ rows << ["output_format", job.output_format]
89
+ rows << ["output_resource_path", job.output_resource_path]
90
+ query = sql_format(job.query).split("\n")
91
+ query.each_with_index do |line, i|
92
+ rows << ['', line]
93
+ rows.last[0] = 'query' if i == 0
94
+ end
95
+ when "mapreduce"
96
+ rows << ["job_id", job.job_id]
97
+ rows << ["schema", job.schema]
98
+ rows << ["input_object_keys", job.input_object_keys]
99
+ rows << ["input_format", job.input_format]
100
+ rows << ["output_database", job.output_database]
101
+ rows << ["output_table", job.output_table]
102
+ end
103
+
104
+ # vertical, unicode, tab, markdown, simple
105
+ terminal_table(rows, headers: headers, max_width: Float::INFINITY)
106
+ end
107
+
108
+ desc 'reuse [JOB_ID]', 'reuse job'
109
+ option :output, type: :string, aliases: '-o', desc: 'new output storage path'
110
+ option :format, type: :string, aliases: '-f', desc: 'new output format [csv/tsv]'
111
+ option :label, type: :string, aliases: '-l', desc: 'new job label'
112
+ option :query, type: :string, aliases: '-q', desc: 'new query'
113
+ option :wait, type: :boolean, aliases: '-w', desc: 'wait job finish'
114
+ def reuse(job_id)
115
+ reuse_job = handle_api_failure do
116
+ client.job(job_id)
117
+ end
118
+
119
+ if reuse_job.type == "select"
120
+ query_string = options[:query]
121
+ if query_string
122
+ if File.exists? query_string
123
+ query_string = open(query_string).read
124
+ puts "Query: #{query_string}"
125
+ end
126
+ else
127
+ query_string = reuse_job.query
128
+ end
129
+
130
+ output_format = options[:format]
131
+ unless output_format
132
+ output_format = reuse_job.output_format
133
+ end
134
+
135
+ output_resource_path = options[:output]
136
+ unless output_resource_path
137
+ output_resource_path = reuse_job.output_resource_path
138
+ end
139
+
140
+ label = options[:label]
141
+ unless label
142
+ label = reuse_job.label
143
+ end
144
+
145
+ params = {
146
+ query: query_string,
147
+ output_format: output_format,
148
+ output_resource_path: output_resource_path,
149
+ label: label
150
+ }
151
+
152
+ query = Query.new(open_client, params)
153
+ job = handle_api_failure { query.execute }
154
+
155
+ say_status "accepted job", "job_id: #{job.id}"
156
+ if options[:wait]
157
+ CLI::Command.start(["job", "log", job.id.to_s, "-t"])
158
+ CLI::Command.start(["job", "result", job.id.to_s])
159
+ end
160
+ else
161
+ say_status "invalid job type", "job_type: #{reuse_job.type}"
162
+ exit 1
163
+ end
164
+ end
165
+
166
+ desc 'log [JOB_ID]', 'show job log'
167
+ option :tail, type: :boolean, aliases: '-t', desc: 'tail -f'
168
+ def log(job_id)
169
+ if options[:tail]
170
+ mark=nil
171
+ while true
172
+ jst, log = handle_api_failure {
173
+ j = client.job(job_id)
174
+ [j.running?, j.log]
175
+ }
176
+ lines = log.split("\n")
177
+ if mark
178
+ n = lines.rindex(mark)
179
+ lines = lines[(n+1)..-1] if n
180
+ end
181
+ unless lines.empty?
182
+ mark=lines[-1]
183
+ puts lines.join("\n")
184
+ end
185
+ break unless jst
186
+ sleep 1
187
+ end
188
+ else
189
+ log = handle_api_failure do
190
+ client.job(job_id).log
191
+ end
192
+
193
+ puts log
194
+ end
195
+ end
196
+
197
+ desc 'result [JOB_ID]', 'Create download url'
198
+ option :format, type: :string, aliases: '-f', desc: 'output format [csv|tsv|table|url|time]', default: "table"
199
+ def result(job_id)
200
+ if options[:format] == "url"
201
+ download_url = handle_api_failure do
202
+ client.job(job_id).download_urls
203
+ end
204
+ puts download_url
205
+ return
206
+ end
207
+
208
+ unless job_id.include? "/"
209
+ job = handle_api_failure do
210
+ client.job(job_id)
211
+ end
212
+ target = job.output_resource_path
213
+ else
214
+ target = job_id
215
+ end
216
+
217
+ unless target
218
+ if job.try(:type) == 'split'
219
+ say_status 'ERROR', 'Invalid job type', :red
220
+ else
221
+ say_status 'ERROR', 'Invalid job_id or dag://bucket/prefix/', :red
222
+ end
223
+ exit 1
224
+ end
225
+
226
+ unless job && job.finished?
227
+ say_status 'ERROR', "Job is not finished (status=#{job.status})", :red
228
+ return
229
+ end
230
+
231
+ uri = URI.parse(target)
232
+ bucket = uri.host
233
+ prefix = uri.path[1..-1]
234
+ bucket = client.buckets[bucket]
235
+ rsm = handle_api_failure do
236
+ bucket.objects[File.join(prefix, ".resultsetmetadata")].read
237
+ end
238
+
239
+ if rsm.headers['Last-Modified'].present?
240
+ finishedat = Time.parse(rsm.headers['Last-Modified'].first)
241
+ end
242
+
243
+ if %w(table time).include? options[:format]
244
+ h = []
245
+ h << "job start at #{time_format(job.start_at)}" if job
246
+ h << "finished #{time_format(finishedat)}" if finishedat
247
+ h << "#{(finishedat-job.start_at).to_i} seconds." if job && finishedat
248
+ puts h.join(", ")
249
+ return if options[:format]=="time"
250
+ end
251
+
252
+ field_separator = ","
253
+ field_separator = "\t" if job.output_format == "tsv"
254
+
255
+ hdr = Oj.load(rsm)
256
+ hdrs=[]
257
+ hdr["columns"].each{|e| hdrs[e["position"].to_i-1] = e["name"] }
258
+ rows = handle_api_failure do
259
+ bucket.objects.where(prefix: prefix)
260
+ .select{|s| s.name.include? "/0" }
261
+ .map{|e| CSV.parse(bucket.objects[e.name].read, col_sep: field_separator) }
262
+ end
263
+
264
+ rows.each{|obj|
265
+ case options[:format]
266
+ when 'csv'
267
+ puts hdrs.join(",")
268
+ puts obj.map{|m| m.join(",") }.join("\n")
269
+ when 'tsv'
270
+ puts hdrs.join("\t")
271
+ puts obj.map{|m| m.join("\t") }.join("\n")
272
+ else
273
+ terminal_table(obj, headers: hdrs)
274
+ end
275
+ }
276
+ end
277
+
278
+ desc 'kill! [JOB_ID]', 'Stop job'
279
+ def kill!(job_id)
280
+ handle_api_failure do
281
+ client.job(job_id).kill
282
+ end
283
+
284
+ say_status("Cancelling job", job_id)
285
+ end
286
+
287
+ desc 'progress [JOB_ID]', 'Show Progress Bar'
288
+ def progress(job_id=nil)
289
+ if job_id
290
+ jobs = [job_id]
291
+ else
292
+ jobs = handle_api_failure{
293
+ client.jobs.select{|j| j.running? }.map{|j| j.id }
294
+ }
295
+ end
296
+ return unless jobs.length==1
297
+ progress_bar = ProgressBar.create(total: 100, format: '%t %a <%B> %p%% %e', autofinish: false)
298
+ stage = nil
299
+ while true
300
+ job = handle_api_failure{ client.job(jobs[0]) }
301
+ if ! job.running?
302
+ progress_bar.finish
303
+ puts "#{job.status}"
304
+ break
305
+ end
306
+ if job.stage && stage!=job.stage
307
+ progress_bar.finish if stage
308
+ progress_bar = ProgressBar.create(title: "Stage-#{job.stage}/Job-#{jobs[0]}", total: 100, format: '%t %a <%B> %p%% %e', autofinish: false)
309
+ stage = job.stage
310
+ end
311
+ progress_bar.progress = job.progress.to_f if job.progress && stage == job.stage
312
+ sleep 1
313
+ end
314
+ end
315
+
316
+ desc 'wait [JOB_ID]', 'wait job'
317
+ def wait(job_id=nil)
318
+ while true
319
+ if job_id
320
+ job = handle_api_failure{ client.job(job_id) }
321
+ break unless job.running?
322
+ else
323
+ jobs = handle_api_failure{
324
+ client.jobs.select{|j| j.running? }
325
+ }
326
+ break if jobs.empty?
327
+ end
328
+ sleep 1
329
+ end
330
+ end
331
+
332
+ private
333
+
334
+ def sql_format(query)
335
+ tmp = query.dup
336
+ sql_formatter = AnbtSql::Formatter.new(AnbtSql::Rule.new)
337
+ sql_formatter.format(tmp).to_s
338
+ end
339
+ end
340
+ end
341
+ end