mobilize-hive 1.29 → 1.31

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -142,6 +142,17 @@ Start
142
142
  * cluster and user are optional for all of the below.
143
143
  * cluster defaults to the first cluster listed;
144
144
  * user is treated the same way as in [mobilize-ssh][mobilize-ssh].
145
+ * params are also optional for all of the below. They replace HQL in sources.
146
+ * params are passed as a YML or JSON, as in:
147
+ * `hive.run source:<source_path>, params:{'date': '2013-03-01', 'unit': 'widgets'}`
148
+ * this example replaces all the keys, preceded by '@' in all source hqls with the value.
149
+ * The preceding '@' is used to keep from replacing instances
150
+ of "date" and "unit" in the HQL; you should have `@date` and `@unit` in your actual HQL
151
+ if you'd like to replace those tokens.
152
+ * in addition, the following params are substituted automatically:
153
+ * `$utc_date` - replaced with YYYY-MM-DD date, UTC
154
+ * `$utc_time` - replaced with HH:MM time, UTC
155
+ * any occurrence of these values in HQL will be replaced at runtime.
145
156
  * hive.run `hql:<hql> || source:<gsheet_path>, user:<user>, cluster:<cluster>`, which executes the
146
157
  script in the hql or source sheet and returns any output specified at the
147
158
  end. If the cmd or last query in source is a select statement, column headers will be
@@ -1,56 +1,7 @@
1
1
  module Mobilize
2
2
  module Hive
3
- def Hive.config
4
- Base.config('hive')
5
- end
6
-
7
- def Hive.exec_path(cluster)
8
- Hive.clusters[cluster]['exec_path']
9
- end
10
-
11
- def Hive.output_db(cluster)
12
- Hive.clusters[cluster]['output_db']
13
- end
14
-
15
- def Hive.output_db_user(cluster)
16
- output_db_node = Hadoop.gateway_node(cluster)
17
- output_db_user = Ssh.host(output_db_node)['user']
18
- output_db_user
19
- end
20
-
21
- def Hive.clusters
22
- Hive.config['clusters']
23
- end
24
-
25
- def Hive.slot_ids(cluster)
26
- (1..Hive.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
27
- end
28
-
29
- def Hive.slot_worker_by_cluster_and_path(cluster,path)
30
- working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
31
- Hive.slot_ids(cluster).each do |slot_id|
32
- unless working_slots.include?(slot_id)
33
- Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
34
- return slot_id
35
- end
36
- end
37
- #return false if none are available
38
- return false
39
- end
40
-
41
- def Hive.unslot_worker_by_path(path)
42
- begin
43
- Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
44
- return true
45
- rescue
46
- return false
47
- end
48
- end
49
-
50
- def Hive.databases(cluster,user_name)
51
- Hive.run(cluster,"show databases",user_name)['stdout'].split("\n")
52
- end
53
-
3
+ #adds convenience methods
4
+ require "#{File.dirname(__FILE__)}/../helpers/hive_helper"
54
5
  # converts a source path or target path to a dst in the context of handler and stage
55
6
  def Hive.path_to_dst(path,stage_path,gdrive_slot)
56
7
  has_handler = true if path.index("://")
@@ -142,12 +93,25 @@ module Mobilize
142
93
  end
143
94
 
144
95
  #run a generic hive command, with the option of passing a file hash to be locally available
145
- def Hive.run(cluster,hql,user_name,file_hash=nil)
96
+ def Hive.run(cluster,hql,user_name,params=nil,file_hash=nil)
146
97
  # no TempStatsStore
147
98
  hql = "set hive.stats.autogather=false;#{hql}"
148
99
  filename = hql.to_md5
149
100
  file_hash||= {}
150
101
  file_hash[filename] = hql
102
+ #add in default params
103
+ params ||= {}
104
+ params = params.merge(Hive.default_params)
105
+ #replace any params in the file_hash and command
106
+ params.each do |k,v|
107
+ file_hash.each do |name,data|
108
+ if k.starts_with?("$")
109
+ data.gsub!(k,v)
110
+ else
111
+ data.gsub!("@#{k}",v)
112
+ end
113
+ end
114
+ end
151
115
  #silent mode so we don't have logs in stderr; clip output
152
116
  #at hadoop read limit
153
117
  command = "#{Hive.exec_path(cluster)} -S -f #{filename} | head -c #{Hadoop.read_limit}"
@@ -192,7 +156,8 @@ module Mobilize
192
156
 
193
157
  #check for select at end
194
158
  hql_array = hql.split(";").map{|hc| hc.strip}.reject{|hc| hc.length==0}
195
- if hql_array.last.downcase.starts_with?("select")
159
+ last_statement = hql_array.last.downcase.split("\n").reject{|l| l.starts_with?("-- ")}.first
160
+ if last_statement.to_s.starts_with?("select")
196
161
  #nil if no prior commands
197
162
  prior_hql = hql_array[0..-2].join(";") if hql_array.length > 1
198
163
  select_hql = hql_array.last
@@ -200,10 +165,10 @@ module Mobilize
200
165
  "drop table if exists #{output_path}",
201
166
  "create table #{output_path} as #{select_hql};"].join(";")
202
167
  full_hql = [prior_hql, output_table_hql].compact.join(";")
203
- result = Hive.run(cluster,full_hql, user_name)
168
+ result = Hive.run(cluster,full_hql, user_name,params['params'])
204
169
  Dataset.find_or_create_by_url(out_url)
205
170
  else
206
- result = Hive.run(cluster, hql, user_name)
171
+ result = Hive.run(cluster, hql, user_name,params['params'])
207
172
  Dataset.find_or_create_by_url(out_url)
208
173
  Dataset.write_by_url(out_url,result['stdout'],user_name) if result['stdout'].to_s.length>0
209
174
  end
@@ -244,9 +209,10 @@ module Mobilize
244
209
  schema_hash
245
210
  end
246
211
 
247
- def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
212
+ def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil, params=nil)
248
213
  table_path = [db,table].join(".")
249
214
  table_stats = Hive.table_stats(cluster, db, table, user_name)
215
+ url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
250
216
 
251
217
  source_hql_array = source_hql.split(";")
252
218
  last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
@@ -262,7 +228,8 @@ module Mobilize
262
228
  temp_set_hql = "set mapred.job.name=#{job_name} (temp table);"
263
229
  temp_drop_hql = "drop table if exists #{temp_table_path};"
264
230
  temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
265
- Hive.run(cluster,temp_create_hql,user_name)
231
+ response = Hive.run(cluster,temp_create_hql,user_name,params)
232
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
266
233
 
267
234
  source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
268
235
  source_fields = source_table_stats['field_defs']
@@ -300,10 +267,12 @@ module Mobilize
300
267
  target_insert_hql,
301
268
  temp_drop_hql].join
302
269
 
303
- Hive.run(cluster, target_full_hql, user_name)
270
+ response = Hive.run(cluster, target_full_hql, user_name, params)
271
+
272
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
304
273
 
305
274
  elsif part_array.length > 0 and
306
- table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
275
+ table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
307
276
  #partitions and no target table or same partitions in both target table and user params
308
277
 
309
278
  target_headers = source_fields.map{|f| f['name']}.reject{|h| part_array.include?(h)}
@@ -350,9 +319,17 @@ module Mobilize
350
319
  part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
351
320
  part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
352
321
  part_perm_hql = part_set_hql + part_select_hql
353
- part_perm_tsv = Hive.run(cluster, part_perm_hql, user_name)['stdout']
322
+ response = Hive.run(cluster, part_perm_hql, user_name, params)
323
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
324
+ part_perm_tsv = response['stdout']
354
325
  #having gotten the permutations, ensure they are dropped
355
326
  part_hash_array = part_perm_tsv.tsv_to_hash_array
327
+ #make sure there is data
328
+ if part_hash_array.first.nil? or part_hash_array.first.values.include?(nil)
329
+ #blank result set, return url
330
+ return url
331
+ end
332
+
356
333
  part_drop_hql = part_hash_array.map do |h|
357
334
  part_drop_stmt = h.map do |name,value|
358
335
  part_defs[name[1..-2]]=="string" ? "#{name}='#{value}'" : "#{name}=#{value}"
@@ -368,12 +345,12 @@ module Mobilize
368
345
 
369
346
  target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
370
347
 
371
- Hive.run(cluster, target_full_hql, user_name)
348
+ response = Hive.run(cluster, target_full_hql, user_name, params)
349
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
372
350
  else
373
351
  error_msg = "Incompatible partition specs"
374
352
  raise error_msg
375
353
  end
376
- url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
377
354
  return url
378
355
  end
379
356
 
@@ -381,6 +358,12 @@ module Mobilize
381
358
  #Accepts options to drop existing target if any
382
359
  #also schema with column datatype overrides
383
360
  def Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop=false, schema_hash=nil)
361
+ return nil if source_tsv.strip.length==0
362
+ if source_tsv.index("\r\n")
363
+ source_tsv = source_tsv.gsub("\r\n","\n")
364
+ elsif source_tsv.index("\r")
365
+ source_tsv = source_tsv.gsub("\r","\n")
366
+ end
384
367
  source_headers = source_tsv.tsv_header_array
385
368
 
386
369
  table_path = [db,table].join(".")
@@ -388,6 +371,8 @@ module Mobilize
388
371
 
389
372
  schema_hash ||= {}
390
373
 
374
+ url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
375
+
391
376
  if part_array.length == 0 and
392
377
  table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].nil?}
393
378
  #no partitions in either user params or the target table
@@ -414,10 +399,11 @@ module Mobilize
414
399
 
415
400
  target_full_hql = [target_drop_hql,target_create_hql,target_insert_hql].join(";")
416
401
 
417
- Hive.run(cluster, target_full_hql, user_name, file_hash)
402
+ response = Hive.run(cluster, target_full_hql, user_name, nil, file_hash)
403
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
418
404
 
419
405
  elsif part_array.length > 0 and
420
- table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
406
+ table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
421
407
  #partitions and no target table
422
408
  #or same partitions in both target table and user params
423
409
  #or drop and start fresh
@@ -441,13 +427,17 @@ module Mobilize
441
427
  "partitioned by #{partition_defs}"
442
428
 
443
429
  #create target table early if not here
444
- Hive.run(cluster, target_create_hql, user_name)
430
+ response = Hive.run(cluster, target_create_hql, user_name)
431
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
432
+
433
+ #return url (operation complete) if there's no data
434
+ source_hash_array = source_tsv.tsv_to_hash_array
435
+ return url if source_hash_array.length==1 and source_hash_array.first.values.compact.length==0
445
436
 
446
437
  table_stats = Hive.table_stats(cluster, db, table, user_name)
447
438
 
448
439
  #create data hash from source hash array
449
440
  data_hash = {}
450
- source_hash_array = source_tsv.tsv_to_hash_array
451
441
  source_hash_array.each do |ha|
452
442
  tpmk = part_array.map{|pn| "#{pn}=#{ha[pn]}"}.join("/")
453
443
  tpmv = ha.reject{|k,v| part_array.include?(k)}.values.join("\001")
@@ -480,7 +470,8 @@ module Mobilize
480
470
  #run actual partition adds all at once
481
471
  if target_part_hql.length>0
482
472
  puts "Adding partitions to #{cluster}/#{db}/#{table} for #{user_name} at #{Time.now.utc}"
483
- Hive.run(cluster, target_part_hql, user_name)
473
+ response = Hive.run(cluster, target_part_hql, user_name)
474
+ raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
484
475
  end
485
476
  else
486
477
  error_msg = "Incompatible partition specs: " +
@@ -488,7 +479,7 @@ module Mobilize
488
479
  "user_params:#{part_array.to_s}"
489
480
  raise error_msg
490
481
  end
491
- url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
482
+
492
483
  return url
493
484
  end
494
485
 
@@ -525,11 +516,11 @@ module Mobilize
525
516
  #source table
526
517
  cluster,source_path = source.path.split("/").ie{|sp| [sp.first, sp[1..-1].join(".")]}
527
518
  source_hql = "select * from #{source_path};"
528
- elsif ['gsheet','gridfs','hdfs'].include?(source.handler)
519
+ elsif ['gsheet','gfile','gridfs','hdfs'].include?(source.handler)
529
520
  if source.path.ie{|sdp| sdp.index(/\.[A-Za-z]ql$/) or sdp.ends_with?(".ql")}
530
521
  source_hql = source.read(user_name,gdrive_slot)
531
522
  else
532
- #tsv from sheet
523
+ #tsv from sheet or file
533
524
  source_tsv = source.read(user_name,gdrive_slot)
534
525
  end
535
526
  end
@@ -554,6 +545,8 @@ module Mobilize
554
545
  Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop, schema_hash)
555
546
  elsif source_tsv
556
547
  Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop, schema_hash)
548
+ elsif source
549
+ #null sheet
557
550
  else
558
551
  raise "Unable to determine source tsv or source hql"
559
552
  end
@@ -580,11 +573,8 @@ module Mobilize
580
573
  select_hql = "select * from #{source_path};"
581
574
  hql = [set_hql,select_hql].join
582
575
  response = Hive.run(cluster, hql,user_name)
583
- if response['exit_code']==0
584
- return response['stdout']
585
- else
586
- raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}"
587
- end
576
+ raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}" if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
577
+ return response['stdout']
588
578
  end
589
579
 
590
580
  def Hive.write_by_dataset_path(dst_path,source_tsv,user_name,*args)
@@ -0,0 +1,63 @@
1
+ module Mobilize
2
+ module Hive
3
+ def self.config
4
+ Base.config('hive')
5
+ end
6
+
7
+ def self.exec_path(cluster)
8
+ self.clusters[cluster]['exec_path']
9
+ end
10
+
11
+ def self.output_db(cluster)
12
+ self.clusters[cluster]['output_db']
13
+ end
14
+
15
+ def self.output_db_user(cluster)
16
+ output_db_node = Hadoop.gateway_node(cluster)
17
+ output_db_user = Ssh.host(output_db_node)['user']
18
+ output_db_user
19
+ end
20
+
21
+ def self.clusters
22
+ self.config['clusters']
23
+ end
24
+
25
+ def self.slot_ids(cluster)
26
+ (1..self.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
27
+ end
28
+
29
+ def self.slot_worker_by_cluster_and_path(cluster,path)
30
+ working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
31
+ self.slot_ids(cluster).each do |slot_id|
32
+ unless working_slots.include?(slot_id)
33
+ Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
34
+ return slot_id
35
+ end
36
+ end
37
+ #return false if none are available
38
+ return false
39
+ end
40
+
41
+ def self.unslot_worker_by_path(path)
42
+ begin
43
+ Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
44
+ return true
45
+ rescue
46
+ return false
47
+ end
48
+ end
49
+
50
+ def self.databases(cluster,user_name)
51
+ self.run(cluster,"show databases",user_name)['stdout'].split("\n")
52
+ end
53
+
54
+ def self.default_params
55
+ time = Time.now.utc
56
+ {
57
+ '$utc_date'=>time.strftime("%Y-%m-%d"),
58
+ '$utc_time'=>time.strftime("%H:%M"),
59
+ }
60
+ end
61
+ end
62
+ end
63
+
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.29"
3
+ VERSION = "1.31"
4
4
  end
5
5
  end
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-hdfs","1.29"
19
+ gem.add_runtime_dependency "mobilize-hdfs","1.31"
20
20
  end
@@ -20,7 +20,15 @@
20
20
  active: true
21
21
  trigger: after hive_test_2
22
22
  status: ""
23
- stage1: hive.run hql:"select act_date as `date`,product,category,value from mobilize.hive_test_1;"
23
+ stage1: hive.run hql:"select '@date' as `date`,product,category,value from mobilize.hive_test_1;", params:{'date':'2013-01-01'}
24
24
  stage2: hive.write source:"stage1",target:"mobilize/hive_test_3", partitions:"date/product", drop:true
25
25
  stage3: hive.write hql:"select * from mobilize.hive_test_3;",target:"mobilize/hive_test_3", partitions:"date/product", drop:false
26
26
  stage4: gsheet.write source:"hive://mobilize/hive_test_3", target:"hive_test_3.out"
27
+ - name: hive_test_4
28
+ active: true
29
+ trigger: after hive_test_3
30
+ status: ""
31
+ stage1: hive.write source:"hive_test_4_stage_1.in", target:"mobilize/hive_test_1", partitions:"act_date"
32
+ stage2: hive.write source:"hive_test_4_stage_2.in", target:"mobilize/hive_test_1", partitions:"act_date"
33
+ stage3: hive.run hql:"select '$utc_date $utc_time' as `date_time`,product,category,value from mobilize.hive_test_1;"
34
+ stage4: gsheet.write source:stage3, target:"hive_test_4.out"
@@ -25,6 +25,18 @@ describe "Mobilize" do
25
25
  hive_1_in_tsv = YAML.load_file("#{Mobilize::Base.root}/test/hive_test_1_in.yml").hash_array_to_tsv
26
26
  hive_1_in_sheet.write(hive_1_in_tsv,Mobilize::Gdrive.owner_name)
27
27
 
28
+ #create blank sheet
29
+ hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
30
+ [hive_4_stage_1_in_sheet].each {|s| s.delete if s}
31
+ hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
32
+
33
+ #create sheet w just headers
34
+ hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
35
+ [hive_4_stage_2_in_sheet].each {|s| s.delete if s}
36
+ hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
37
+ hive_4_stage_2_in_sheet_header = hive_1_in_tsv.tsv_header_array.join("\t")
38
+ hive_4_stage_2_in_sheet.write(hive_4_stage_2_in_sheet_header,Mobilize::Gdrive.owner_name)
39
+
28
40
  hive_1_schema_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
29
41
  [hive_1_schema_sheet].each {|s| s.delete if s}
30
42
  hive_1_schema_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
@@ -51,21 +63,25 @@ describe "Mobilize" do
51
63
  [hive_2_target_sheet].each{|s| s.delete if s}
52
64
  hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
53
65
  [hive_3_target_sheet].each{|s| s.delete if s}
66
+ hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
67
+ [hive_4_target_sheet].each{|s| s.delete if s}
54
68
 
55
69
  puts "job row added, force enqueued requestor, wait for stages"
56
70
  r.enqueue!
57
- wait_for_stages(1200)
71
+ wait_for_stages(2100)
58
72
 
59
73
  puts "jobtracker posted data to test sheet"
60
74
  hive_1_stage_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_2.out",gdrive_slot)
61
75
  hive_1_stage_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_3.out",gdrive_slot)
62
76
  hive_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_2.out",gdrive_slot)
63
77
  hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
78
+ hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
64
79
 
65
80
  assert hive_1_stage_2_target_sheet.read(u.name).length == 219
66
81
  assert hive_1_stage_3_target_sheet.read(u.name).length > 3
67
82
  assert hive_2_target_sheet.read(u.name).length == 599
68
83
  assert hive_3_target_sheet.read(u.name).length == 347
84
+ assert hive_4_target_sheet.read(u.name).length == 432
69
85
  end
70
86
 
71
87
  def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
metadata CHANGED
@@ -1,29 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.29'
4
+ version: '1.31'
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Cassio Paes-Leme
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-03-27 00:00:00.000000000 Z
12
+ date: 2013-04-18 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: mobilize-hdfs
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - '='
18
20
  - !ruby/object:Gem::Version
19
- version: '1.29'
21
+ version: '1.31'
20
22
  type: :runtime
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - '='
25
28
  - !ruby/object:Gem::Version
26
- version: '1.29'
29
+ version: '1.31'
27
30
  description: Adds hive read, write, and run support to mobilize-hdfs
28
31
  email:
29
32
  - cpaesleme@dena.com
@@ -38,6 +41,7 @@ files:
38
41
  - Rakefile
39
42
  - lib/mobilize-hive.rb
40
43
  - lib/mobilize-hive/handlers/hive.rb
44
+ - lib/mobilize-hive/helpers/hive_helper.rb
41
45
  - lib/mobilize-hive/tasks.rb
42
46
  - lib/mobilize-hive/version.rb
43
47
  - lib/samples/hive.yml
@@ -51,26 +55,33 @@ files:
51
55
  - test/test_helper.rb
52
56
  homepage: http://github.com/dena/mobilize-hive
53
57
  licenses: []
54
- metadata: {}
55
58
  post_install_message:
56
59
  rdoc_options: []
57
60
  require_paths:
58
61
  - lib
59
62
  required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
60
64
  requirements:
61
- - - '>='
65
+ - - ! '>='
62
66
  - !ruby/object:Gem::Version
63
67
  version: '0'
68
+ segments:
69
+ - 0
70
+ hash: -4285752485316531029
64
71
  required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
65
73
  requirements:
66
- - - '>='
74
+ - - ! '>='
67
75
  - !ruby/object:Gem::Version
68
76
  version: '0'
77
+ segments:
78
+ - 0
79
+ hash: -4285752485316531029
69
80
  requirements: []
70
81
  rubyforge_project:
71
- rubygems_version: 2.0.3
82
+ rubygems_version: 1.8.25
72
83
  signing_key:
73
- specification_version: 4
84
+ specification_version: 3
74
85
  summary: Adds hive read, write, and run support to mobilize-hdfs
75
86
  test_files:
76
87
  - test/hive_job_rows.yml
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: a7bf2935cac4914e2530e45a969942a5ae856e1c
4
- data.tar.gz: 4b5b751411661d78e1ce3e4c65a8e979ffe3318b
5
- SHA512:
6
- metadata.gz: b1a7f94de8452cb8aecdaba6e33b20dfeea208f86e046a8f9b48e2387758ef6fda9a74773775d1d7b7fe2e5631190d4958327fd747d526b4f2381c379f9a8b8d
7
- data.tar.gz: 2a3e60b51db89a7e43ae465d9d0853f4cd875d9590b91d4b51ac7211debb7ea79c87b906a25eea1fbd8e4080ee60dac926dae3c69bf81f848d67c63b85cff407