humboldt 1.1.0-java → 1.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b68b1b122d8e69538a1ed888cd8ede41e07a4a7
4
- data.tar.gz: b658d3c93c062116ecec9c1b1f3b09fb26b5ff49
3
+ metadata.gz: 6a97f4f665fc9cd31b2243197be7f0486ce1d078
4
+ data.tar.gz: 5cc3932bfdba9f35c13db5f9ecbc566129887ba9
5
5
  SHA512:
6
- metadata.gz: 17780975cfc99cb6f4f4639d23dc3c3336085c5343e37b37841eb7d8eacfc49d7963d5f217d90f9495f4ed8aed1ac36e45419e79d7dddc1cce93aa87b08621c6
7
- data.tar.gz: 8b79f3d82f416d5180ff657ca01de80b6370b745a6587aec44efcdfb4918196d0d0b61c84f46ca3fbb4ca0d10e249c5cbc1cf64e241aaa7b3c47771ef76dd43c
6
+ metadata.gz: c66f66dc44da611770ebeec44e3c1ecb16e17207fee63db3ae0cc9d36704e860fead33cf1dbefe5f8dce4c628d08470ee8ec330c2e8468ae455b35100c1ae8ba
7
+ data.tar.gz: 20306297025d1bc59a034c9e8f166ff30e682ccca731e8a36df2e2b785645bbe0c227858c99cee69ed0dad67cf3dc615107f0f5dfdb3d561ba64b41df50687f0
@@ -109,7 +109,7 @@ module Rubydoop
109
109
  # for example when you don't know how wide the part of the key that you want
110
110
  # use is. In the example above if you use the domain to identify sites these
111
111
  # can be of different length. If your visitor IDs are 20 characters you can
112
- # use 0 and -20 as your indexes.
112
+ # use 0 and -21 as your indexes.
113
113
  #
114
114
  # @param [Fixnum] start_index The first index of the slice, negative numbers
115
115
  # are counted from the end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'thor'
4
4
  require 'aws'
5
- require 'open3'
5
+ require 'rubydoop/version'
6
6
  require 'rubydoop/package' # this prints an annoying warning in JRuby 1.7.0.RC1
7
7
  require 'humboldt/emr_flow'
8
8
  require 'humboldt/hadoop_status_filter'
@@ -204,23 +204,24 @@ module Humboldt
204
204
 
205
205
  def run_command(*args)
206
206
  say_status(:running, 'Hadoop started')
207
- Open3.popen3(*args) do |stdin, stdout, stderr, wait_thr|
208
- stdin.close
209
- stdout_printer = Thread.new(stdout) do |stdout|
210
- while line = stdout.gets
211
- say(line.chomp)
207
+ HadoopStatusFilter.run_command_with_filtering(*args) do |type, *args|
208
+ case type
209
+ when :stderr
210
+ unless options.silent?
211
+ say(args.first, :red)
212
212
  end
213
- end
214
- stderr_printer = Thread.new(stderr) do |stderr|
215
- filter = HadoopStatusFilter.new(stderr, self, options.silent?)
216
- filter.run
217
- end
218
- stdout_printer.join
219
- stderr_printer.join
220
- if wait_thr.value.exitstatus == 0
221
- say_status(:done, 'Job completed')
213
+ when :status
214
+ say(args[0], args[1] == :error ? :red : :yellow)
215
+ when :counters
216
+ say
217
+ print_table(args.first)
218
+ say
219
+ when :done
220
+ say('Job completed')
221
+ when :failed
222
+ say('Job failed', :red)
222
223
  else
223
- say_status(:failed, 'Job failed', :red)
224
+ say(args.first)
224
225
  end
225
226
  end
226
227
  end
@@ -1,14 +1,38 @@
1
1
  # encoding: utf-8
2
2
 
3
+ require 'open3'
4
+
3
5
  module Humboldt
6
+ # @private
4
7
  class HadoopStatusFilter
5
- def initialize(hadoop_stderr, shell, silent)
8
+ def initialize(hadoop_stderr, listener)
6
9
  @hadoop_stderr = hadoop_stderr
7
- @shell = shell
8
- @silent = silent
10
+ @listener = listener
9
11
  @counters = {}
10
12
  end
11
13
 
14
+ def self.run_command_with_filtering(*args, &listener)
15
+ Open3.popen3(*args) do |stdin, stdout, stderr, wait_thr|
16
+ stdin.close
17
+ stdout_printer = Thread.new(stdout) do |stdout|
18
+ while line = stdout.gets
19
+ listener.call(:stdout, line.chomp)
20
+ end
21
+ end
22
+ stderr_printer = Thread.new(stderr) do |stderr|
23
+ filter = new(stderr, listener)
24
+ filter.run
25
+ end
26
+ stdout_printer.join
27
+ stderr_printer.join
28
+ if wait_thr.value.exitstatus == 0
29
+ listener.call(:done)
30
+ else
31
+ listener.call(:failed)
32
+ end
33
+ end
34
+ end
35
+
12
36
  def run
13
37
  counter_group = nil
14
38
  while line = @hadoop_stderr.gets
@@ -46,7 +70,7 @@ module Humboldt
46
70
  end
47
71
  end
48
72
  end
49
- @shell.say(line.chomp, :red) unless @silent
73
+ @listener.call(:stderr, line.chomp)
50
74
  end
51
75
  print_counters_table
52
76
  end
@@ -54,7 +78,7 @@ module Humboldt
54
78
  private
55
79
 
56
80
  def hadoop_log?(line)
57
- line =~ /(?:INFO|WARN) (?:mapred|input|output|util|jvm|mapreduce)\./
81
+ line =~ /(?:INFO|WARN) (?:mapred|input|output|util|jvm|mapreduce|compress|reduce)\./
58
82
  end
59
83
 
60
84
  def ignore?(line)
@@ -73,11 +97,11 @@ module Humboldt
73
97
  end
74
98
 
75
99
  def report_progress(map, reduce)
76
- @shell.say_status(:progress, "map #{map}%, reduce #{reduce}%")
100
+ @listener.call(:progress, "map #{map}%, reduce #{reduce}%")
77
101
  end
78
102
 
79
103
  def report_error(line)
80
- @shell.say_status(@error_type, line.chomp, @error_type == :error ? :red : :yellow)
104
+ @listener.call(:status, line.chomp, @error_type)
81
105
  end
82
106
 
83
107
  def print_counters_table
@@ -89,9 +113,7 @@ module Humboldt
89
113
  ]
90
114
  end
91
115
  table.pop
92
- @shell.say
93
- @shell.print_table(table)
94
- @shell.say
116
+ @listener.call(:counters, table)
95
117
  end
96
118
  end
97
119
  end
@@ -83,7 +83,7 @@ module RunnerHelpers
83
83
 
84
84
  def configuration
85
85
  @configuration ||= ::Hadoop::Conf::Configuration.new.tap do |config|
86
- config.set 'mapred.job.tracker', 'local'
86
+ config.set('mapreduce.framework.name', 'local')
87
87
  end
88
88
  end
89
89
 
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Humboldt
4
- VERSION = '1.1.0'.freeze
4
+ VERSION = '1.1.1'.freeze
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: humboldt
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: java
6
6
  authors:
7
7
  - The Burt Platform Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-01 00:00:00.000000000 Z
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement