hadupils 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -30,3 +30,11 @@
30
30
  parameter (custom support for pre-1.9 ruby, the Kernel.system call
31
31
  of which does not handle such things)
32
32
 
33
+ ### 0.3.0
34
+
35
+ * Introduced Hadupils::Extensions::FlatArchivePath
36
+ * The "hive" command uses a FlatArchivePath for hadoop-ext instead
37
+ of Flat, so PATH will include bin dirs of any hadoop-ext archives
38
+ when a streaming query runs.
39
+ * Some misc. utility functions in Hadupils::Util for reading tarballs.
40
+
@@ -25,7 +25,7 @@ module Hadupils::Commands
25
25
 
26
26
  module HadoopExt
27
27
  def hadoop_ext
28
- @hadoop_ext ||= Hadupils::Extensions::Flat.new(Hadupils::Search.hadoop_assets)
28
+ @hadoop_ext ||= Hadupils::Extensions::FlatArchivePath.new(Hadupils::Search.hadoop_assets)
29
29
  end
30
30
  end
31
31
 
@@ -129,8 +129,12 @@ module Hadupils::Extensions
129
129
  [@hiverc]
130
130
  end
131
131
 
132
+ def default_hiverc_items
133
+ @assets.dup
134
+ end
135
+
132
136
  def assemble_hiverc
133
- assets = @assets
137
+ assets = default_hiverc_items
134
138
  if @hiverc_block
135
139
  assets = @hiverc_block.call(assets.dup)
136
140
  end
@@ -140,6 +144,36 @@ module Hadupils::Extensions
140
144
  end
141
145
  end
142
146
 
147
+ class FlatArchivePath < Flat
148
+ def archives_for_path_env
149
+ @assets.find_all do |a|
150
+ if a.kind_of? Hadupils::Assets::Archive
151
+ begin
152
+ Hadupils::Util.archive_has_directory?(a.path, 'bin')
153
+ rescue
154
+ false
155
+ end
156
+ else
157
+ false
158
+ end
159
+ end
160
+ end
161
+
162
+ def default_hiverc_items
163
+ items = super
164
+ archs = archives_for_path_env
165
+ if archs.length > 0
166
+ items << self.class.assemble_path_env(archs)
167
+ end
168
+ items
169
+ end
170
+
171
+ def self.assemble_path_env(archives)
172
+ paths = archives.collect {|a| "$(pwd)/#{ a.name }/bin" }
173
+ "SET mapred.child.env = PATH=#{ paths.join(':') }:$PATH;"
174
+ end
175
+ end
176
+
143
177
  class Static < Base
144
178
  def self.gather_assets(path)
145
179
  []
@@ -0,0 +1,25 @@
1
+ module Hadupils::Util
2
+ def self.read_archive(archive_path)
3
+ require 'rubygems/package'
4
+ require 'zlib'
5
+ Zlib::GzipReader.open(archive_path) do |zlib|
6
+ Gem::Package::TarReader.new(zlib) do |tar|
7
+ tar.rewind
8
+ yield tar
9
+ end
10
+ end
11
+ end
12
+
13
+ def self.archive_has_directory?(archive_path, directory)
14
+ directory = directory + '/' unless directory.end_with?('/')
15
+ targets = [directory[0..-2], directory]
16
+ found = false
17
+ read_archive(archive_path) do |arch|
18
+ arch.each do |entry|
19
+ found = (entry.directory? and targets.include?(entry.full_name))
20
+ break if found
21
+ end
22
+ end
23
+ found
24
+ end
25
+ end
data/lib/hadupils.rb CHANGED
@@ -7,3 +7,4 @@ require 'hadupils/commands'
7
7
  require 'hadupils/extensions'
8
8
  require 'hadupils/runners'
9
9
  require 'hadupils/search'
10
+ require 'hadupils/util'
@@ -44,9 +44,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
44
44
  assert_equal result, @klass.run(params)
45
45
  end
46
46
 
47
- should 'have a Flat extension based on a search for hadoop-ext' do
47
+ should 'have a FlatArchivePath extension based on a search for hadoop-ext' do
48
48
  Hadupils::Search.expects(:hadoop_assets).with.returns(assets = mock())
49
- Hadupils::Extensions::Flat.expects(:new).with(assets).returns(extension = mock())
49
+ Hadupils::Extensions::FlatArchivePath.expects(:new).with(assets).returns(extension = mock())
50
50
  cmd = @klass.new
51
51
  assert_equal extension, cmd.hadoop_ext
52
52
  # This should cause failure if the previous result wasn't
@@ -231,6 +231,48 @@ class Hadupils::ExtensionsTest < Test::Unit::TestCase
231
231
  assert_equal expected, f.read
232
232
  end
233
233
  end
234
+
235
+ context 'ArchivePath extension' do
236
+ setup do
237
+ require 'rubygems/package'
238
+ require 'zlib'
239
+ @klass = Hadupils::Extensions::FlatArchivePath
240
+ @bin_archives = %w(one two).collect do |name|
241
+ @tempdir.file("zarchive-with-bin-#{name}.tar.gz") do |f|
242
+ Zlib::GzipWriter.open(f) do |zlib|
243
+ buffer = StringIO.new('')
244
+ Gem::Package::TarWriter.new(buffer) do |writer|
245
+ writer.add_file("#{name}.txt", '644'.oct) {|x| x.write('blah')}
246
+ writer.mkdir('bin', '755'.oct)
247
+ end
248
+ zlib.write buffer.string
249
+ end
250
+ f.path
251
+ end
252
+ end
253
+
254
+ @bin_archive_lines = @bin_archives.collect {|path| "ADD ARCHIVE #{path};"}.join("\n")
255
+ end
256
+
257
+ should 'produce a hiverc for the expected assets and archives with bins added to PATH' do
258
+ path_prefix = @bin_archives.collect {|p| "$(pwd)/#{::File.basename(p)}/bin" }.join(':')
259
+ expected = "#{@file_line}\n#{@jar_line}\n#{@archive_line}\n" +
260
+ "#{@bin_archive_lines}\n" +
261
+ "SET mapred.child.env = PATH=#{path_prefix}:$PATH;\n" +
262
+ (last_line = 'SET last.line = thisone;') + "\n"
263
+ ext = @klass.new(@tempdir.path) do
264
+ hiverc do |items|
265
+ items << last_line
266
+ end
267
+ end
268
+ hivercs = ext.hivercs
269
+ assert_kind_of Hadupils::Extensions::HiveRC::Dynamic, hivercs[0]
270
+ assert_equal 1, hivercs.length
271
+ ::File.open(hivercs[0].path, 'r') do |f|
272
+ assert_equal expected, f.read
273
+ end
274
+ end
275
+ end
234
276
  end
235
277
  end
236
278
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hadupils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -88,6 +88,7 @@ files:
88
88
  - lib/hadupils/extensions/hive.rb
89
89
  - lib/hadupils/extensions.rb
90
90
  - lib/hadupils/assets.rb
91
+ - lib/hadupils/util.rb
91
92
  - lib/hadupils.rb
92
93
  - test/unit/assets_test.rb
93
94
  - test/unit/commands_test.rb