hadupils 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -30,3 +30,11 @@
30
30
  parameter (custom support for pre-1.9 ruby, the Kernel.system call
31
31
  of which does not handle such things)
32
32
 
33
+ ### 0.3.0
34
+
35
+ * Introduced Hadupils::Extensions::FlatArchivePath
36
+ * The "hive" command uses a FlatArchivePath for hadoop-ext instead
37
+ of Flat, so PATH will include bin dirs of any hadoop-ext archives
38
+ when a streaming query runs.
39
+ * Some misc. utility functions in Hadupils::Util for reading tarballs.
40
+
@@ -25,7 +25,7 @@ module Hadupils::Commands
25
25
 
26
26
  module HadoopExt
27
27
  def hadoop_ext
28
- @hadoop_ext ||= Hadupils::Extensions::Flat.new(Hadupils::Search.hadoop_assets)
28
+ @hadoop_ext ||= Hadupils::Extensions::FlatArchivePath.new(Hadupils::Search.hadoop_assets)
29
29
  end
30
30
  end
31
31
 
@@ -129,8 +129,12 @@ module Hadupils::Extensions
129
129
  [@hiverc]
130
130
  end
131
131
 
132
+ def default_hiverc_items
133
+ @assets.dup
134
+ end
135
+
132
136
  def assemble_hiverc
133
- assets = @assets
137
+ assets = default_hiverc_items
134
138
  if @hiverc_block
135
139
  assets = @hiverc_block.call(assets.dup)
136
140
  end
@@ -140,6 +144,36 @@ module Hadupils::Extensions
140
144
  end
141
145
  end
142
146
 
147
+ class FlatArchivePath < Flat
148
+ def archives_for_path_env
149
+ @assets.find_all do |a|
150
+ if a.kind_of? Hadupils::Assets::Archive
151
+ begin
152
+ Hadupils::Util.archive_has_directory?(a.path, 'bin')
153
+ rescue
154
+ false
155
+ end
156
+ else
157
+ false
158
+ end
159
+ end
160
+ end
161
+
162
+ def default_hiverc_items
163
+ items = super
164
+ archs = archives_for_path_env
165
+ if archs.length > 0
166
+ items << self.class.assemble_path_env(archs)
167
+ end
168
+ items
169
+ end
170
+
171
+ def self.assemble_path_env(archives)
172
+ paths = archives.collect {|a| "$(pwd)/#{ a.name }/bin" }
173
+ "SET mapred.child.env = PATH=#{ paths.join(':') }:$PATH;"
174
+ end
175
+ end
176
+
143
177
  class Static < Base
144
178
  def self.gather_assets(path)
145
179
  []
@@ -0,0 +1,25 @@
1
+ module Hadupils::Util
2
+ def self.read_archive(archive_path)
3
+ require 'rubygems/package'
4
+ require 'zlib'
5
+ Zlib::GzipReader.open(archive_path) do |zlib|
6
+ Gem::Package::TarReader.new(zlib) do |tar|
7
+ tar.rewind
8
+ yield tar
9
+ end
10
+ end
11
+ end
12
+
13
+ def self.archive_has_directory?(archive_path, directory)
14
+ directory = directory + '/' unless directory.end_with?('/')
15
+ targets = [directory[0..-2], directory]
16
+ found = false
17
+ read_archive(archive_path) do |arch|
18
+ arch.each do |entry|
19
+ found = (entry.directory? and targets.include?(entry.full_name))
20
+ break if found
21
+ end
22
+ end
23
+ found
24
+ end
25
+ end
data/lib/hadupils.rb CHANGED
@@ -7,3 +7,4 @@ require 'hadupils/commands'
7
7
  require 'hadupils/extensions'
8
8
  require 'hadupils/runners'
9
9
  require 'hadupils/search'
10
+ require 'hadupils/util'
@@ -44,9 +44,9 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
44
44
  assert_equal result, @klass.run(params)
45
45
  end
46
46
 
47
- should 'have a Flat extension based on a search for hadoop-ext' do
47
+ should 'have a FlatArchivePath extension based on a search for hadoop-ext' do
48
48
  Hadupils::Search.expects(:hadoop_assets).with.returns(assets = mock())
49
- Hadupils::Extensions::Flat.expects(:new).with(assets).returns(extension = mock())
49
+ Hadupils::Extensions::FlatArchivePath.expects(:new).with(assets).returns(extension = mock())
50
50
  cmd = @klass.new
51
51
  assert_equal extension, cmd.hadoop_ext
52
52
  # This should cause failure if the previous result wasn't
@@ -231,6 +231,48 @@ class Hadupils::ExtensionsTest < Test::Unit::TestCase
231
231
  assert_equal expected, f.read
232
232
  end
233
233
  end
234
+
235
+ context 'ArchivePath extension' do
236
+ setup do
237
+ require 'rubygems/package'
238
+ require 'zlib'
239
+ @klass = Hadupils::Extensions::FlatArchivePath
240
+ @bin_archives = %w(one two).collect do |name|
241
+ @tempdir.file("zarchive-with-bin-#{name}.tar.gz") do |f|
242
+ Zlib::GzipWriter.open(f) do |zlib|
243
+ buffer = StringIO.new('')
244
+ Gem::Package::TarWriter.new(buffer) do |writer|
245
+ writer.add_file("#{name}.txt", '644'.oct) {|x| x.write('blah')}
246
+ writer.mkdir('bin', '755'.oct)
247
+ end
248
+ zlib.write buffer.string
249
+ end
250
+ f.path
251
+ end
252
+ end
253
+
254
+ @bin_archive_lines = @bin_archives.collect {|path| "ADD ARCHIVE #{path};"}.join("\n")
255
+ end
256
+
257
+ should 'produce a hiverc for the expected assets and archives with bins added to PATH' do
258
+ path_prefix = @bin_archives.collect {|p| "$(pwd)/#{::File.basename(p)}/bin" }.join(':')
259
+ expected = "#{@file_line}\n#{@jar_line}\n#{@archive_line}\n" +
260
+ "#{@bin_archive_lines}\n" +
261
+ "SET mapred.child.env = PATH=#{path_prefix}:$PATH;\n" +
262
+ (last_line = 'SET last.line = thisone;') + "\n"
263
+ ext = @klass.new(@tempdir.path) do
264
+ hiverc do |items|
265
+ items << last_line
266
+ end
267
+ end
268
+ hivercs = ext.hivercs
269
+ assert_kind_of Hadupils::Extensions::HiveRC::Dynamic, hivercs[0]
270
+ assert_equal 1, hivercs.length
271
+ ::File.open(hivercs[0].path, 'r') do |f|
272
+ assert_equal expected, f.read
273
+ end
274
+ end
275
+ end
234
276
  end
235
277
  end
236
278
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hadupils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -88,6 +88,7 @@ files:
88
88
  - lib/hadupils/extensions/hive.rb
89
89
  - lib/hadupils/extensions.rb
90
90
  - lib/hadupils/assets.rb
91
+ - lib/hadupils/util.rb
91
92
  - lib/hadupils.rb
92
93
  - test/unit/assets_test.rb
93
94
  - test/unit/commands_test.rb