humboldt 1.1.0-java → 1.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1b68b1b122d8e69538a1ed888cd8ede41e07a4a7
4
- data.tar.gz: b658d3c93c062116ecec9c1b1f3b09fb26b5ff49
3
+ metadata.gz: 6a97f4f665fc9cd31b2243197be7f0486ce1d078
4
+ data.tar.gz: 5cc3932bfdba9f35c13db5f9ecbc566129887ba9
5
5
  SHA512:
6
- metadata.gz: 17780975cfc99cb6f4f4639d23dc3c3336085c5343e37b37841eb7d8eacfc49d7963d5f217d90f9495f4ed8aed1ac36e45419e79d7dddc1cce93aa87b08621c6
7
- data.tar.gz: 8b79f3d82f416d5180ff657ca01de80b6370b745a6587aec44efcdfb4918196d0d0b61c84f46ca3fbb4ca0d10e249c5cbc1cf64e241aaa7b3c47771ef76dd43c
6
+ metadata.gz: c66f66dc44da611770ebeec44e3c1ecb16e17207fee63db3ae0cc9d36704e860fead33cf1dbefe5f8dce4c628d08470ee8ec330c2e8468ae455b35100c1ae8ba
7
+ data.tar.gz: 20306297025d1bc59a034c9e8f166ff30e682ccca731e8a36df2e2b785645bbe0c227858c99cee69ed0dad67cf3dc615107f0f5dfdb3d561ba64b41df50687f0
@@ -109,7 +109,7 @@ module Rubydoop
109
109
  # for example when you don't know how wide the part of the key that you want
110
110
  # use is. In the example above if you use the domain to identify sites these
111
111
  # can be of different length. If your visitor IDs are 20 characters you can
112
- # use 0 and -20 as your indexes.
112
+ # use 0 and -21 as your indexes.
113
113
  #
114
114
  # @param [Fixnum] start_index The first index of the slice, negative numbers
115
115
  # are counted from the end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'thor'
4
4
  require 'aws'
5
- require 'open3'
5
+ require 'rubydoop/version'
6
6
  require 'rubydoop/package' # this prints an annoying warning in JRuby 1.7.0.RC1
7
7
  require 'humboldt/emr_flow'
8
8
  require 'humboldt/hadoop_status_filter'
@@ -204,23 +204,24 @@ module Humboldt
204
204
 
205
205
  def run_command(*args)
206
206
  say_status(:running, 'Hadoop started')
207
- Open3.popen3(*args) do |stdin, stdout, stderr, wait_thr|
208
- stdin.close
209
- stdout_printer = Thread.new(stdout) do |stdout|
210
- while line = stdout.gets
211
- say(line.chomp)
207
+ HadoopStatusFilter.run_command_with_filtering(*args) do |type, *args|
208
+ case type
209
+ when :stderr
210
+ unless options.silent?
211
+ say(args.first, :red)
212
212
  end
213
- end
214
- stderr_printer = Thread.new(stderr) do |stderr|
215
- filter = HadoopStatusFilter.new(stderr, self, options.silent?)
216
- filter.run
217
- end
218
- stdout_printer.join
219
- stderr_printer.join
220
- if wait_thr.value.exitstatus == 0
221
- say_status(:done, 'Job completed')
213
+ when :status
214
+ say(args[0], args[1] == :error ? :red : :yellow)
215
+ when :counters
216
+ say
217
+ print_table(args.first)
218
+ say
219
+ when :done
220
+ say('Job completed')
221
+ when :failed
222
+ say('Job failed', :red)
222
223
  else
223
- say_status(:failed, 'Job failed', :red)
224
+ say(args.first)
224
225
  end
225
226
  end
226
227
  end
@@ -1,14 +1,38 @@
1
1
  # encoding: utf-8
2
2
 
3
+ require 'open3'
4
+
3
5
  module Humboldt
6
+ # @private
4
7
  class HadoopStatusFilter
5
- def initialize(hadoop_stderr, shell, silent)
8
+ def initialize(hadoop_stderr, listener)
6
9
  @hadoop_stderr = hadoop_stderr
7
- @shell = shell
8
- @silent = silent
10
+ @listener = listener
9
11
  @counters = {}
10
12
  end
11
13
 
14
+ def self.run_command_with_filtering(*args, &listener)
15
+ Open3.popen3(*args) do |stdin, stdout, stderr, wait_thr|
16
+ stdin.close
17
+ stdout_printer = Thread.new(stdout) do |stdout|
18
+ while line = stdout.gets
19
+ listener.call(:stdout, line.chomp)
20
+ end
21
+ end
22
+ stderr_printer = Thread.new(stderr) do |stderr|
23
+ filter = new(stderr, listener)
24
+ filter.run
25
+ end
26
+ stdout_printer.join
27
+ stderr_printer.join
28
+ if wait_thr.value.exitstatus == 0
29
+ listener.call(:done)
30
+ else
31
+ listener.call(:failed)
32
+ end
33
+ end
34
+ end
35
+
12
36
  def run
13
37
  counter_group = nil
14
38
  while line = @hadoop_stderr.gets
@@ -46,7 +70,7 @@ module Humboldt
46
70
  end
47
71
  end
48
72
  end
49
- @shell.say(line.chomp, :red) unless @silent
73
+ @listener.call(:stderr, line.chomp)
50
74
  end
51
75
  print_counters_table
52
76
  end
@@ -54,7 +78,7 @@ module Humboldt
54
78
  private
55
79
 
56
80
  def hadoop_log?(line)
57
- line =~ /(?:INFO|WARN) (?:mapred|input|output|util|jvm|mapreduce)\./
81
+ line =~ /(?:INFO|WARN) (?:mapred|input|output|util|jvm|mapreduce|compress|reduce)\./
58
82
  end
59
83
 
60
84
  def ignore?(line)
@@ -73,11 +97,11 @@ module Humboldt
73
97
  end
74
98
 
75
99
  def report_progress(map, reduce)
76
- @shell.say_status(:progress, "map #{map}%, reduce #{reduce}%")
100
+ @listener.call(:progress, "map #{map}%, reduce #{reduce}%")
77
101
  end
78
102
 
79
103
  def report_error(line)
80
- @shell.say_status(@error_type, line.chomp, @error_type == :error ? :red : :yellow)
104
+ @listener.call(:status, line.chomp, @error_type)
81
105
  end
82
106
 
83
107
  def print_counters_table
@@ -89,9 +113,7 @@ module Humboldt
89
113
  ]
90
114
  end
91
115
  table.pop
92
- @shell.say
93
- @shell.print_table(table)
94
- @shell.say
116
+ @listener.call(:counters, table)
95
117
  end
96
118
  end
97
119
  end
@@ -83,7 +83,7 @@ module RunnerHelpers
83
83
 
84
84
  def configuration
85
85
  @configuration ||= ::Hadoop::Conf::Configuration.new.tap do |config|
86
- config.set 'mapred.job.tracker', 'local'
86
+ config.set('mapreduce.framework.name', 'local')
87
87
  end
88
88
  end
89
89
 
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Humboldt
4
- VERSION = '1.1.0'.freeze
4
+ VERSION = '1.1.1'.freeze
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: humboldt
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: java
6
6
  authors:
7
7
  - The Burt Platform Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-01 00:00:00.000000000 Z
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement