hadupils 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -16,3 +16,17 @@
16
16
  * Fixed embarrassing mispelling of "shoulda-context" in gemspec
17
17
  development dependencies
18
18
 
19
+ ### 0.1.3
20
+
21
+ * Fixed compat. issue for ruby 1.8; downcase on symbol is a no-no.
22
+ In Hadupils::Command module.
23
+
24
+ ### 0.2.0
25
+
26
+ * Introduced hive extensions (hive-ext)
27
+ * Hive command uses hive extensions to assemble hivercs
28
+ * Hive command assembles HIVE_AUX_JARS_PATH variable
29
+ * Base runner can handle environment variable hash as first command
30
+ parameter (custom support for pre-1.9 ruby, the Kernel.system call
31
+ of which does not handle such things)
32
+
@@ -35,16 +35,23 @@ module Hadupils::Commands
35
35
  end
36
36
  end
37
37
 
38
+ module HiveExt
39
+ def hive_ext
40
+ @hive_ext ||= Hadupils::Extensions::HiveSet.new(Hadupils::Search.hive_extensions)
41
+ end
42
+ end
43
+
38
44
  class Hive < SimpleCommand
39
45
  include HadoopExt
46
+ include HiveExt
40
47
  include UserConf
41
48
 
42
49
  def assemble_parameters(parameters)
43
- user_config.hivercs + hadoop_ext.hivercs + parameters
50
+ user_config.hivercs + hadoop_ext.hivercs + hive_ext.hivercs + parameters
44
51
  end
45
52
 
46
53
  def run(parameters)
47
- Hadupils::Runners::Hive.run assemble_parameters(parameters)
54
+ Hadupils::Runners::Hive.run assemble_parameters(parameters), hive_ext.hive_aux_jars_path
48
55
  end
49
56
  end
50
57
 
@@ -0,0 +1,249 @@
1
+ module Hadupils::Extensions
2
+
3
+ # Hive-targeted extensions derived from filesystem layout
4
+ #
5
+ # = Concept
6
+ #
7
+ # There are a few ways to "extend" one's hive session:
8
+ #
9
+ # * Adding files, archives, jars to it (+ADD ...+).
10
+ # * Setting variables and whatnot (+SET ...+).
11
+ # * Registering your own UDFS.
12
+ # * Specifying paths to jars to make available within the session's
13
+ # classpath (+HIVE_AUX_JARS_PATH+ env. var.).
14
+ #
15
+ # All of these things can be done through the use of initialization
16
+ # files (via hive's +-i+ option), except for the auxiliary jar libs
17
+ # environment variable (which is.... wait for it... in the environment).
18
+ #
19
+ # This class provides an abstraction to enable the following:
20
+ # * lay your files out according to its expectations
21
+ # * wrap that layout with an instance of this class
22
+ # * it'll give an interface for accessing initialization files (#hivercs)
23
+ # that make the stuff available in a hive session
24
+ # * it'll dynamically assemble the initialization file necessary to
25
+ # ensure appropriate assets are made available in the session
26
+ # * if you provide your own initialization file in the expected place,
27
+ # it'll ensure that the dynamic stuff is applied _first_ and the static
28
+ # one second, such that your static one can assume the neighboring
29
+ # assets are already in the session.
30
+ # * it'll give you a list of jars to make available as auxiliary_jars in the
31
+ # session based on contents of +aux-jars+.
32
+ #
33
+ # You lay it down, the object makes sense of it, nothing other than
34
+ # file organization required.
35
+ #
36
+ # = Filesystem Layout
37
+ #
38
+ # Suppose you have the following stuff (denoting symlinks with +->+):
39
+ #
40
+ # /etc/foo/
41
+ # an.archive.tar.gz
42
+ # another.archive.tar.gz
43
+ # aux-jars/
44
+ # aux-only.jar
45
+ # ignored.archive.tar.gz
46
+ # ignored.file.txt
47
+ # jarry.jar -> ../jarry.jar
48
+ # dist-only.jar
49
+ # hiverc
50
+ # jarry.jar
51
+ # textie.txt
52
+ # yummy.yaml
53
+ #
54
+ # Now you create an instance:
55
+ #
56
+ # ext = Hadupils::Extensions::Hive.new('/etc/foo')
57
+ #
58
+ # You could get the hive command-line options for using this stuff
59
+ # via:
60
+ #
61
+ # ext.hivercs
62
+ #
63
+ # It'll give you objects for two initialization files:
64
+ # 1. A dynamic one that has the appropriate commands for adding
65
+ # +an.archive.tar.gz+, +another.archive.tar.gz+, +dist-only.jar+,
66
+ # +jarry.jar+, +textie.txt+, and +yummy.yaml+ to the session.
67
+ # 2. The +hiverc+ one that's in there.
68
+ #
69
+ # And, the +ext.auxiliary_jars+ accessor will return a list of paths to
70
+ # the jars (_only_ the jars) contained within the +aux-jars+ path;
71
+ # a caller to hive would use this to construct the +HIVE_AUX_JARS_PATH+
72
+ # variable.
73
+ #
74
+ # Notice that +jarry.jar+ is common to the distributed usage (it'll be
75
+ # added to the session and associated distributed cache) and to the
76
+ # auxiliary path. That's because it appears in the main directory and
77
+ # in the +aux-jars+ subdirectory. There's nothing magical about the
78
+ # use of a symlink; that just saves disk space. 10 MB ought be enough
79
+ # for anyone.
80
+ #
81
+ # If there was no +hiverc+ file, then you would only get the
82
+ # initialization file object for the loading of assets in the main
83
+ # directory. Conversely, if there were no such assets, but there was
84
+ # a +hiverc+ file, you would get only the object for that file. If
85
+ # neither were present, the #hivercs will be an empty list.
86
+ #
87
+ # If there is no +aux-jars+ directory, or that directory has no jars,
88
+ # the +ext.auxiliary_jars+ would be an empty list. Only jars will be included
89
+ # in that list; files without a +.jar+ extension will be ignored.
90
+ #
91
+ class Hive
92
+ module AuxJarsPath
93
+ # A string representation of the hive auxiliary jars paths,
94
+ # based on #auxiliary_jars, suitable for usage as the value
95
+ # of +HIVE_AUX_JARS_PATH+ within the environment.
96
+ def hive_aux_jars_path
97
+ auxiliary_jars.collect {|jar| jar.strip}.join(',')
98
+ end
99
+ end
100
+
101
+ include AuxJarsPath
102
+
103
+ AUX_PATH = 'aux-jars'
104
+ HIVERC_PATH = 'hiverc'
105
+
106
+ attr_reader :auxiliary_jars
107
+ attr_reader :path
108
+
109
+ def initialize(path)
110
+ @path = ::File.expand_path(path)
111
+ @auxiliary_jars = self.class.find_auxiliary_jars(@path)
112
+ @dynamic_ext = self.class.assemble_dynamic_extension(@path)
113
+ @static_ext = self.class.assemble_static_extension(@path)
114
+ end
115
+
116
+ # An array of hive initialization objects derived from
117
+ # dynamic and static sets. May be an empty list. Dynamic
118
+ # are guaranteed to come before static, so a static +hiverc+ can
119
+ # count on the other assets being available.
120
+ def hivercs
121
+ dynamic_hivercs + static_hivercs
122
+ end
123
+
124
+ # An array of dynamic, managed hive initialization objects
125
+ # (Hadupils::Extensions::HiveRC::Dynamic) based on the assets
126
+ # found within the #path. May be an empty list.
127
+ def dynamic_hivercs
128
+ if @dynamic_ext.assets.length > 0
129
+ @dynamic_ext.hivercs
130
+ else
131
+ []
132
+ end
133
+ end
134
+
135
+ # An array of static hive initialization objects
136
+ # (Hadupils::Extensions::HiveRC::Static) based on the presence
137
+ # of a +hiverc+ file within the #path. May be an empty list.
138
+ def static_hivercs
139
+ @static_ext.hivercs
140
+ end
141
+
142
+ def self.find_auxiliary_jars(path)
143
+ target = ::File.join(path, AUX_PATH)
144
+ if ::File.directory? target
145
+ jars = Hadupils::Assets.assets_in(target).find_all do |asset|
146
+ asset.kind_of? Hadupils::Assets::Jar
147
+ end
148
+ jars.collect {|asset| asset.path}
149
+ else
150
+ []
151
+ end
152
+ end
153
+
154
+ def self.assemble_dynamic_extension(path)
155
+ Flat.new(path) do
156
+ assets do |list|
157
+ list.reject {|asset| [AUX_PATH, HIVERC_PATH].include? asset.name }
158
+ end
159
+ end
160
+ end
161
+
162
+ def self.assemble_static_extension(path)
163
+ Static.new(path)
164
+ end
165
+ end
166
+
167
+ # Collection class for filesystem-based Hive extensions
168
+ #
169
+ # Pretty simple:
170
+ # * Given a #path in the filesystem
171
+ # * Scan that path for subdirectories
172
+ # * Wrap each subdirectory with Hadupils::Extensions::Hive.
173
+ # * Aggregate their hivercs and their auxiliary jars
174
+ #
175
+ # See the Hadupils::Extensions::Hive class docs to understand
176
+ # the expectations per subdirectory. The #path provided to
177
+ # HiveSet should be a directory that contains subdirectories conforming
178
+ # to Hadupils::Extensions::Hive conventions.
179
+ #
180
+ # All other files in the #path will be ignored; only subdirectories will
181
+ # be considered.
182
+ #
183
+ # == Member Extensions
184
+ #
185
+ # The Array of Hadupils::Extensions::Hive instances derived from
186
+ # #path's subdirectories will be available via the #members attribute
187
+ # reader.
188
+ #
189
+ # The order of members matches the lexicographic order of their
190
+ # respective subdirectory basenames within #path.
191
+ #
192
+ # The order of #hivercs and the order of #auxiliary_jars will follow
193
+ # the order of the respective #members. All of member 0's #hivercs,
194
+ # followed by all of member 1's #hivercs, and so on.
195
+ #
196
+ # Thus the order of things is deterministic, according to lexicographic
197
+ # ordering of stuff in the filesystem. You control it in how you
198
+ # lay stuff out.
199
+ #
200
+ # == Good Advice
201
+ #
202
+ # Don't do anything stupid.
203
+ #
204
+ class HiveSet
205
+ include Hive::AuxJarsPath
206
+
207
+ attr_reader :path
208
+ attr_reader :members
209
+
210
+ def initialize(path)
211
+ @path = ::File.expand_path(path)
212
+ @members = self.class.gather_member_extensions(@path)
213
+ end
214
+
215
+ def self.gather_member_extensions(path)
216
+ ::Dir.entries(path).sort.inject([]) do |result, entry|
217
+ full_path = ::File.join(path, entry)
218
+ if entry != '.' and entry != '..' and ::File.directory?(full_path)
219
+ result << Hive.new(full_path)
220
+ else
221
+ result
222
+ end
223
+ end
224
+ end
225
+
226
+ # The cumulative Array of hive initialization file objects
227
+ # across all #members, in member order.
228
+ def hivercs
229
+ members_inject {|member| member.hivercs}
230
+ end
231
+
232
+ # The cumulative Array of #auxiliary_jars across all #members,
233
+ # in member order.
234
+ def auxiliary_jars
235
+ members_inject {|member| member.auxiliary_jars}
236
+ end
237
+
238
+ # Accumulate a list based on an operation (given in a block)
239
+ # per member. Accumulation is done against a starting empty list
240
+ # with the addition operator, not through appending. Therefore,
241
+ # the block needs to provide an array, not an arbitrary object.
242
+ def members_inject
243
+ @members.inject [] do |result, member|
244
+ increment = yield member
245
+ result + increment
246
+ end
247
+ end
248
+ end
249
+ end
@@ -160,3 +160,5 @@ module Hadupils::Extensions
160
160
  end
161
161
  end
162
162
  end
163
+
164
+ require 'hadupils/extensions/hive'
@@ -8,8 +8,32 @@ module Hadupils::Runners
8
8
 
9
9
  def command; end
10
10
 
11
+ def execute!
12
+ command_list = command
13
+ if RUBY_VERSION < '1.9' and command_list[0].kind_of? Hash
14
+ deletes = []
15
+ overrides = {}
16
+ begin
17
+ command_list[0].each do |key, val|
18
+ if ::ENV.has_key? key
19
+ overrides[key] = ::ENV[key]
20
+ else
21
+ deletes << key
22
+ end
23
+ ::ENV[key] = val
24
+ end
25
+ Kernel.system(*command_list[1..-1])
26
+ ensure
27
+ overrides.each {|key, val| ::ENV[key] = val }
28
+ deletes.each {|key| ::ENV.delete key }
29
+ end
30
+ else
31
+ Kernel.system(*command_list)
32
+ end
33
+ end
34
+
11
35
  def wait!
12
- @last_result = Kernel.system(*command)
36
+ @last_result = execute!
13
37
  @last_status = $?
14
38
  if @last_result.nil?
15
39
  255
@@ -18,12 +42,17 @@ module Hadupils::Runners
18
42
  end
19
43
  end
20
44
 
21
- def self.run(params)
22
- self.new(params).wait!
45
+ def self.run(*params)
46
+ self.new(*params).wait!
23
47
  end
24
48
  end
25
49
 
26
50
  class Hive < Base
51
+ def initialize(params, hive_aux_jars_path='')
52
+ super(params)
53
+ @hive_aux_jars_path = hive_aux_jars_path
54
+ end
55
+
27
56
  def self.base_runner
28
57
  @base_runner || ::File.join(ENV['HIVE_HOME'], 'bin', 'hive')
29
58
  end
@@ -33,7 +62,7 @@ module Hadupils::Runners
33
62
  end
34
63
 
35
64
  def command
36
- items = params.inject([self.class.base_runner]) do |result, param|
65
+ params.inject([env_overrides, self.class.base_runner]) do |result, param|
37
66
  if param.respond_to? :hive_opts
38
67
  result + param.hive_opts
39
68
  else
@@ -41,5 +70,16 @@ module Hadupils::Runners
41
70
  end
42
71
  end
43
72
  end
73
+
74
+ def env_overrides
75
+ e = {}
76
+ settings = [@hive_aux_jars_path, ::ENV['HIVE_AUX_JARS_PATH']].reject do |val|
77
+ val.nil? or val.strip == ''
78
+ end
79
+ if settings.length > 0
80
+ e['HIVE_AUX_JARS_PATH'] = settings.join(',')
81
+ end
82
+ e
83
+ end
44
84
  end
45
85
  end
@@ -46,4 +46,22 @@ module Hadupils::Search
46
46
  def self.hadoop_assets
47
47
  find_from_pwd(hadoop_assets_name)
48
48
  end
49
+
50
+ # The basename to use when looking for hive extensions from pwd.
51
+ def self.hive_extensions_name
52
+ @hive_extensions_name || 'hive-ext'
53
+ end
54
+
55
+ # Set the basename to use when looking for hive assets from pwd.
56
+ def self.hive_extensions_name=(basename)
57
+ @hive_extensions_name = basename
58
+ end
59
+
60
+ # A search for #hive_extensions_name from the pwd.
61
+ # The default behavior is to look for a subdir named +hive-ext+,
62
+ # starting from the current working directory and walking upwards until
63
+ # a match is found or the file system root is encountered.
64
+ def self.hive_extensions
65
+ find_from_pwd(hive_extensions_name)
66
+ end
49
67
  end
@@ -47,4 +47,16 @@ class Test::Unit::TestCase
47
47
  instance_eval &block
48
48
  end
49
49
  end
50
+
51
+ # Lets us define shared bits of shoulda context (setup blocks, tests,
52
+ # subcontexts, etc.) in a declarative manner; installs a singleton method
53
+ # :name into the calling class, that when invoked will eval the given
54
+ # block in the current Shoulda::Context::Context.
55
+ # You can then simply call :name in any arbitrary context in order to
56
+ # make use of the shared stuff within that context.
57
+ def self.shared_context(name, &block)
58
+ define_singleton_method name do
59
+ Shoulda::Context.current_context.instance_eval &block
60
+ end
61
+ end
50
62
  end
@@ -63,28 +63,47 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
63
63
  cmd.user_config
64
64
  end
65
65
 
66
+ should 'have a HiveSet extension based on search for hive-ext' do
67
+ Hadupils::Search.expects(:hive_extensions).with.returns(path = mock())
68
+ Hadupils::Extensions::HiveSet.expects(:new).with(path).returns(extension = mock)
69
+ cmd = @klass.new
70
+ assert_equal extension, cmd.hive_ext
71
+ # Fails on expectations if previous result wasn't cached.
72
+ cmd.hive_ext
73
+ end
74
+
66
75
  context '#run' do
67
76
  setup do
68
77
  @command = @klass.new
69
78
  @command.stubs(:user_config).with.returns(@user_config = mock())
70
79
  @command.stubs(:hadoop_ext).with.returns(@hadoop_ext = mock())
80
+ @command.stubs(:hive_ext).with.returns(@hive_ext = mock)
71
81
  @runner_class = Hadupils::Runners::Hive
72
82
  end
73
83
 
74
- context 'with user config and hadoop asssets hivercs' do
84
+ context 'with user config, hadoop assets, hive ext hivercs and aux jars' do
75
85
  setup do
76
86
  @user_config.stubs(:hivercs).returns(@user_config_hivercs = [mock(), mock()])
77
87
  @hadoop_ext.stubs(:hivercs).returns(@hadoop_ext_hivercs = [mock(), mock(), mock()])
88
+ @hive_ext.stubs(:hivercs).returns(@hive_ext_hivercs = [mock, mock, mock])
89
+ @hive_ext.stubs(:hive_aux_jars_path).returns(@hive_aux_jars_path = mock.to_s)
78
90
  end
79
91
 
80
92
  should 'apply hiverc options to hive runner call' do
81
- @runner_class.expects(:run).with(@user_config_hivercs + @hadoop_ext_hivercs).returns(result = mock())
93
+ @runner_class.expects(:run).with(@user_config_hivercs +
94
+ @hadoop_ext_hivercs +
95
+ @hive_ext_hivercs,
96
+ @hive_aux_jars_path).returns(result = mock())
82
97
  assert_equal result, @command.run([])
83
98
  end
84
99
 
85
100
  should 'prepend hiverc options before given params to hive runner call' do
86
101
  params = [mock(), mock()]
87
- @runner_class.expects(:run).with(@user_config_hivercs + @hadoop_ext_hivercs + params).returns(result = mock())
102
+ @runner_class.expects(:run).with(@user_config_hivercs +
103
+ @hadoop_ext_hivercs +
104
+ @hive_ext_hivercs +
105
+ params,
106
+ @hive_aux_jars_path).returns(result = mock())
88
107
  assert_equal result, @command.run(params)
89
108
  end
90
109
  end
@@ -93,15 +112,17 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
93
112
  setup do
94
113
  @user_config.stubs(:hivercs).returns([])
95
114
  @hadoop_ext.stubs(:hivercs).returns([])
115
+ @hive_ext.stubs(:hivercs).returns([])
116
+ @hive_ext.stubs(:hive_aux_jars_path).returns('')
96
117
  end
97
118
 
98
- should 'pass params unchanged through to hive runner call' do
99
- @runner_class.expects(:run).with(params = [mock(), mock()]).returns(result = mock())
119
+ should 'pass params unchanged through to hive runner call along with aux jars path' do
120
+ @runner_class.expects(:run).with(params = [mock(), mock()], '').returns(result = mock())
100
121
  assert_equal result, @command.run(params)
101
122
  end
102
123
 
103
124
  should 'handle empty params' do
104
- @runner_class.expects(:run).with([]).returns(result = mock())
125
+ @runner_class.expects(:run).with([], '').returns(result = mock())
105
126
  assert_equal result, @command.run([])
106
127
  end
107
128
  end
@@ -111,8 +132,11 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
111
132
  setup do
112
133
  @conf = ::File.join(@tempdir.path, 'conf')
113
134
  @ext = ::File.join(@tempdir.path, 'hadoop-ext')
135
+ @hive_ext = @tempdir.full_path('hive-ext')
136
+
114
137
  ::Dir.mkdir(@conf)
115
138
  ::Dir.mkdir(@ext)
139
+ ::Dir.mkdir(@hive_ext)
116
140
  @hiverc = @tempdir.file(File.join('conf', 'hiverc')) do |f|
117
141
  f.write(@static_hiverc_content = 'my static content;')
118
142
  f.path
@@ -124,21 +148,81 @@ class Hadupils::CommandsTest < Test::Unit::TestCase
124
148
  @dynamic_hiverc_content = ["ADD FILE #{@ext_file}",
125
149
  "ADD JAR #{@ext_jar}",
126
150
  "ADD ARCHIVE #{@ext_tar}"].join(";\n") + ";\n"
151
+
152
+ # Assemble two entries under hive-ext
153
+ @hive_exts = %w(one two).inject({}) do |result, name|
154
+ state = result[name.to_sym] = {}
155
+ state[:path] = ::File.join(@hive_ext, name)
156
+
157
+ ::Dir.mkdir(state[:path])
158
+ state[:static_hiverc] = ::File.open(::File.join(state[:path], 'hiverc'), 'w') do |file|
159
+ file.write(state[:static_hiverc_content] = "#{name} static content")
160
+ file.path
161
+ end
162
+
163
+ assets = state[:assets] = %w(a.tar.gz b.txt c.jar).collect do |base|
164
+ ::File.open(::File.join(state[:path], "#{name}-#{base}"), 'w') do |file|
165
+ file.path
166
+ end
167
+ end
168
+
169
+ state[:dynamic_hiverc_content] = ["ADD ARCHIVE #{assets[0]};",
170
+ "ADD FILE #{assets[1]};",
171
+ "ADD JAR #{assets[2]};"].join("\n") + "\n"
172
+
173
+ aux_path = state[:aux_path] = ::File.join(state[:path], 'aux-jars')
174
+ ::Dir.mkdir(aux_path)
175
+ state[:aux_jars] = %w(boo foo).collect do |base|
176
+ ::File.open(::File.join(aux_path, "#{name}-#{base}.jar"), 'w') do |file|
177
+ file.path
178
+ end
179
+ end
180
+
181
+ state[:hive_aux_jars_path] = state[:aux_jars].join(',')
182
+
183
+ result
184
+ end
185
+
186
+ # Can't use a simple stub for this because other things are
187
+ # checked within ENV. Use a teardown to reset to its original state.
188
+ @orig_hive_aux_jars_path = ENV['HIVE_AUX_JARS_PATH']
189
+ ::ENV['HIVE_AUX_JARS_PATH'] = env_aux = mock.to_s
190
+ @hive_aux_jars_path_val = [@hive_exts[:one][:hive_aux_jars_path],
191
+ @hive_exts[:two][:hive_aux_jars_path],
192
+ env_aux].join(',')
193
+
127
194
  @pwd = ::Dir.pwd
128
195
  Hadupils::Search.stubs(:user_config).with.returns(@conf)
129
196
  Hadupils::Runners::Hive.stubs(:base_runner).with.returns(@hive_prog = '/opt/hive/bin/hive')
130
197
  ::Dir.chdir @tempdir.path
131
198
  end
132
199
 
200
+ teardown do
201
+ if @orig_hive_aux_jars_path
202
+ ENV['HIVE_AUX_JARS_PATH'] = @orig_hive_aux_jars_path
203
+ else
204
+ ENV.delete 'HIVE_AUX_JARS_PATH'
205
+ end
206
+ end
207
+
133
208
  should 'produce a valid set of parameters and hivercs' do
134
209
  Kernel.stubs(:system).with() do |*args|
135
- args[0] == @hive_prog and
136
- args[1] == '-i' and
137
- File.open(args[2], 'r').read == @static_hiverc_content and
138
- args[3] == '-i' and
139
- File.open(args[4], 'r').read == @dynamic_hiverc_content and
140
- args[5] == '--hiveconf' and
141
- args[6] == 'my.foo=your.fu'
210
+ args[0] == {'HIVE_AUX_JARS_PATH' => @hive_aux_jars_path_val} and
211
+ args[1] == @hive_prog and
212
+ args[2] == '-i' and
213
+ File.open(args[3], 'r').read == @static_hiverc_content and
214
+ args[4] == '-i' and
215
+ File.open(args[5], 'r').read == @dynamic_hiverc_content and
216
+ args[6] == '-i' and
217
+ File.open(args[7], 'r').read == @hive_exts[:one][:dynamic_hiverc_content] and
218
+ args[8] == '-i' and
219
+ File.open(args[9], 'r').read == @hive_exts[:one][:static_hiverc_content] and
220
+ args[10] == '-i' and
221
+ File.open(args[11], 'r').read == @hive_exts[:two][:dynamic_hiverc_content] and
222
+ args[12] == '-i' and
223
+ File.open(args[13], 'r').read == @hive_exts[:two][:static_hiverc_content] and
224
+ args[14] == '--hiveconf' and
225
+ args[15] == 'my.foo=your.fu'
142
226
  end
143
227
  Hadupils::Commands.run 'hive', ['--hiveconf', 'my.foo=your.fu']
144
228
  end
@@ -0,0 +1,257 @@
1
+ class Hadupils::Extensions::HiveTest < Test::Unit::TestCase
2
+ shared_context :provide_hive_ext do
3
+ setup do
4
+ @ext = Hadupils::Extensions::Hive.new(@tempdir.path)
5
+ end
6
+ end
7
+
8
+ shared_context :hive_aux_jars_path_cases do
9
+ should 'assemble hive_aux_jars_path from auxiliary_jars appropriately for use with HIVE_AUX_JARS_PATH env' do
10
+ jars = [mock, mock, mock, mock].collect {|m| m.to_s}
11
+ @ext.expects(:auxiliary_jars).with.returns(jars.collect {|j| " #{j} "})
12
+ # Verifying whitespace trimming, as part of it.
13
+ assert_equal jars.join(','), @ext.hive_aux_jars_path
14
+ end
15
+ end
16
+
17
+ shared_context :empty_hiverc_cases do
18
+ should 'have an empty hivercs list' do
19
+ assert_equal [], @ext.hivercs
20
+ end
21
+ end
22
+
23
+ shared_context :empty_auxiliary_cases do
24
+ should 'have an empty auxiliary jars list' do
25
+ assert_equal [], @ext.auxiliary_jars
26
+ end
27
+ end
28
+
29
+ shared_context :valid_auxiliary_cases do
30
+ should 'provide jars within aux-jars as the auxiliary_jars list' do
31
+ assert_equal @aux_jars, @ext.auxiliary_jars
32
+ end
33
+ end
34
+
35
+ shared_context :static_hiverc_cases do
36
+ should 'have the hiverc path for the final entry in hivercs' do
37
+ assert_equal @hiverc_file, @ext.hivercs[-1].hive_opts[1]
38
+ end
39
+
40
+ should 'have a static hiverc resource' do
41
+ assert_equal '-i', @ext.hivercs[-1].hive_opts[0]
42
+ end
43
+ end
44
+
45
+ shared_context :single_hiverc_cases do
46
+ should 'have only a single hiverc in the hivercs list' do
47
+ assert_equal 1, @ext.hivercs.length
48
+ end
49
+ end
50
+
51
+ shared_context :dynamic_hiverc_cases do
52
+ should 'have a hiverc with appropriate asset-oriented commands as the first entry in the hivercs' do
53
+ File.open(@ext.hivercs[0].hive_opts[1], 'r') do |f|
54
+ assert_equal @asset_commands, f.read
55
+ end
56
+ end
57
+
58
+ should 'have a dynamic hiverc resource' do
59
+ assert_equal '-i', @ext.hivercs[0].hive_opts[0]
60
+ end
61
+ end
62
+
63
+ shared_context :has_auxiliary_path do
64
+ setup do
65
+ @aux = @tempdir.full_path('aux-jars')
66
+ ::Dir.mkdir(@aux)
67
+ end
68
+ end
69
+
70
+ shared_context :has_auxiliary_jars do
71
+ setup do
72
+ if @aux.nil?
73
+ @aux = @tempdir.full_path('aux-jars')
74
+ ::Dir.mkdir(@aux)
75
+ end
76
+ @aux_jars = %w(a b c).collect do |base|
77
+ f = @tempdir.file(::File.join('aux-jars', "#{base}.jar"))
78
+ f.close
79
+ f.path
80
+ end
81
+ %w(tar.gz txt yaml).each do |extension|
82
+ @tempdir.file(::File.join('aux-jars', "bogus.#{extension}"))
83
+ end
84
+ end
85
+ end
86
+
87
+ shared_context :has_hiverc_file do
88
+ setup do
89
+ f = @tempdir.file('hiverc')
90
+ @hiverc_file = f.path
91
+ f.close
92
+ end
93
+ end
94
+
95
+ shared_context :has_assets do
96
+ setup do
97
+ @assets = %w{a.archive.tar.gz a.file.txt a.jar}.collect do |asset|
98
+ f = @tempdir.file(asset)
99
+ f.close
100
+ f.path
101
+ end
102
+
103
+ @asset_commands = "ADD ARCHIVE #{@assets[0]};\n" +
104
+ "ADD FILE #{@assets[1]};\n" +
105
+ "ADD JAR #{@assets[2]};\n"
106
+ end
107
+ end
108
+
109
+ tempdir_context Hadupils::Extensions::Hive do
110
+ context 'with auxiliary jars' do
111
+ provide_hive_ext
112
+ hive_aux_jars_path_cases
113
+ end
114
+
115
+ context 'given an empty directory' do
116
+ provide_hive_ext
117
+ empty_hiverc_cases
118
+ empty_auxiliary_cases
119
+ end
120
+
121
+ context 'given an empty aux-jars directory' do
122
+ has_auxiliary_path
123
+ provide_hive_ext
124
+ empty_hiverc_cases
125
+ empty_auxiliary_cases
126
+ end
127
+
128
+ context 'given a hiverc file' do
129
+ has_hiverc_file
130
+ provide_hive_ext
131
+ empty_auxiliary_cases
132
+ static_hiverc_cases
133
+ single_hiverc_cases
134
+ end
135
+
136
+ context 'given assets' do
137
+ has_assets
138
+
139
+ context 'and nothing else' do
140
+ provide_hive_ext
141
+ empty_auxiliary_cases
142
+ dynamic_hiverc_cases
143
+ single_hiverc_cases
144
+ end
145
+
146
+ context 'and a hiverc file' do
147
+ has_hiverc_file
148
+ provide_hive_ext
149
+ empty_auxiliary_cases
150
+ dynamic_hiverc_cases
151
+ static_hiverc_cases
152
+ end
153
+ end
154
+
155
+ context 'given a directory with an aux-jars directory and jars' do
156
+ has_auxiliary_jars
157
+
158
+ context 'and nothing else' do
159
+ provide_hive_ext
160
+ empty_hiverc_cases
161
+ valid_auxiliary_cases
162
+ end
163
+
164
+ context 'and assets' do
165
+ has_assets
166
+ provide_hive_ext
167
+ valid_auxiliary_cases
168
+ dynamic_hiverc_cases
169
+ single_hiverc_cases
170
+ end
171
+
172
+ context 'and a hiverc file' do
173
+ has_hiverc_file
174
+
175
+ context 'and no assets' do
176
+ provide_hive_ext
177
+ static_hiverc_cases
178
+ valid_auxiliary_cases
179
+ single_hiverc_cases
180
+ end
181
+
182
+ context 'and assets' do
183
+ has_assets
184
+ provide_hive_ext
185
+ dynamic_hiverc_cases
186
+ static_hiverc_cases
187
+ valid_auxiliary_cases
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ tempdir_context Hadupils::Extensions::HiveSet do
194
+ setup do
195
+ @cls = Hadupils::Extensions::HiveSet
196
+ end
197
+
198
+ should 'have the dir path expanded as :path' do
199
+ # Making the path relative demonstrates path expansion
200
+ ::Dir.chdir(::File.dirname(@tempdir.path)) do
201
+ assert_equal @tempdir.path,
202
+ @cls.new(::File.basename(@tempdir.path)).path
203
+ end
204
+ end
205
+
206
+ should 'produce a Hadupils::Extensions::Hive per subdirectory' do
207
+ ::Dir.mkdir(a = @tempdir.full_path('aye'))
208
+ ::Dir.mkdir(b = @tempdir.full_path('bee'))
209
+ ::Dir.mkdir(c = @tempdir.full_path('si'))
210
+
211
+ # These should be ignored 'cause they ain't dirs
212
+ @tempdir.file('foo.txt')
213
+ @tempdir.file('blah.jar')
214
+ @tempdir.file('garbage.tar.gz')
215
+
216
+ expect = [a, b, c].collect {|path| [Hadupils::Extensions::Hive, path]}
217
+ ext = @cls.new(@tempdir.path)
218
+ assert_equal expect,
219
+ ext.members.collect {|member| [member.class, member.path]}
220
+ end
221
+
222
+ context 'with members' do
223
+ setup do
224
+ @member_a = mock
225
+ @member_b = mock
226
+ @cls.expects(:gather_member_extensions).with(@tempdir.path).returns(@members = [@member_a, @member_b])
227
+ @ext = @cls.new @tempdir.path
228
+ end
229
+
230
+ hive_aux_jars_path_cases
231
+
232
+ should 'base members list on :gather_member_extensions' do
233
+ assert_equal @members, @ext.members
234
+ end
235
+
236
+ should 'produce the cumulative hivercs list from its members' do
237
+ hivercs = @members.inject([]) do |expect, member|
238
+ this_pair = [mock, mock]
239
+ member.expects(:hivercs).with.returns(this_pair)
240
+ expect + this_pair
241
+ end
242
+
243
+ assert_equal hivercs, @ext.hivercs
244
+ end
245
+
246
+ should 'produce the cumulative auxiliary_jars list from its members' do
247
+ jarries = @members.inject([]) do |expect, member|
248
+ jars = [mock, mock, mock].collect {|jar| jar.to_s}
249
+ member.expects(:auxiliary_jars).with.returns(jars)
250
+ expect + jars
251
+ end
252
+
253
+ assert_equal jarries, @ext.auxiliary_jars
254
+ end
255
+ end
256
+ end
257
+ end
@@ -11,26 +11,76 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
11
11
  context 'wait!' do
12
12
  setup do
13
13
  @command = [mock(), mock(), mock()]
14
- @runner.expects(:command).with.returns(@command)
15
14
  # This will ensure that $? is non-nil
16
15
  system(RbConfig.ruby, '-v')
17
16
  end
18
17
 
19
- should 'assemble system call via command method' do
20
- Kernel.expects(:system).with(*@command).returns(true)
21
- $?.stubs(:exitstatus).with.returns(mock())
22
- @runner.wait!
23
- end
18
+ context 'with semi-modern ruby' do
19
+ setup do
20
+ @runner.expects(:command).with.returns(@command)
21
+ end
22
+
23
+ should 'assemble system call via command method' do
24
+ Kernel.expects(:system).with(*@command).returns(true)
25
+ $?.stubs(:exitstatus).with.returns(mock())
26
+ @runner.wait!
27
+ end
24
28
 
25
- should 'return 255 when system returns nil' do
26
- Kernel.stubs(:system).returns(nil)
27
- assert_equal 255, @runner.wait!
29
+ should 'return 255 when system returns nil' do
30
+ Kernel.stubs(:system).returns(nil)
31
+ assert_equal 255, @runner.wait!
32
+ end
33
+
34
+ should 'return Process::Status#exitstatus when non-nil system result' do
35
+ Kernel.stubs(:system).returns(true)
36
+ $?.stubs(:exitstatus).with.returns(status = mock())
37
+ assert_equal status, @runner.wait!
38
+ end
28
39
  end
29
40
 
30
- should 'return Process::Status#exitstatus when non-nil system result' do
31
- Kernel.stubs(:system).returns(true)
32
- $?.stubs(:exitstatus).with.returns(status = mock())
33
- assert_equal status, @runner.wait!
41
+ context 'with ruby pre 1.9' do
42
+ setup do
43
+ @orig_ruby_version = ::RUBY_VERSION
44
+ ::RUBY_VERSION = '1.8.7'
45
+ end
46
+
47
+ teardown do
48
+ ::RUBY_VERSION = @orig_ruby_version
49
+ end
50
+
51
+ should 'handle command without env hash normally' do
52
+ @runner.expects(:command).with.returns(@command)
53
+ Kernel.expects(:system).with(*@command).returns(true)
54
+ $?.stubs(:exitstatus).with.returns(mock)
55
+ @runner.wait!
56
+ end
57
+
58
+ should 'handle environment hash specially and restore env' do
59
+ # A defined environment variable to play with.
60
+ var = ::ENV.keys.find {|k| ENV[k].strip.length > 0}
61
+ orig = ::ENV[var]
62
+ to_be_removed = ::ENV.keys.sort[-1] + 'X'
63
+ removal_val = mock.to_s
64
+ replacement = "#{orig}-#{mock.to_s}"
65
+ @runner.expects(:command).with.returns([{var => replacement, to_be_removed => removal_val}] + @command)
66
+ $?.stubs(:exitstatus).with.returns(mock)
67
+ begin
68
+ # Environment variable is overridden during system call
69
+ matcher = Kernel.expects(:system).with do |*args|
70
+ args == @command and ::ENV[var] == replacement and ::ENV[to_be_removed] == removal_val
71
+ end
72
+
73
+ matcher.returns true
74
+
75
+ @runner.wait!
76
+
77
+ # But is restored afterward
78
+ assert_equal orig, ::ENV[var]
79
+ assert_equal false, ::ENV.has_key?(to_be_removed)
80
+ ensure
81
+ ::ENV[var] = orig
82
+ end
83
+ end
34
84
  end
35
85
  end
36
86
  end
@@ -56,12 +106,25 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
56
106
  end
57
107
 
58
108
  should 'provide invocation for bare hive if given empty parameters' do
59
- assert_equal [@hive_path], @klass.new([]).command
109
+ assert_equal [{}, @hive_path], @klass.new([]).command
110
+ end
111
+
112
+ should 'provide invocation with aux jars and bare hive given empty params but aux jars path' do
113
+ ENV.stubs(:[]=).with('HIVE_AUX_JARS_PATH').returns(nil)
114
+ assert_equal [{'HIVE_AUX_JARS_PATH' => 'foo'}, @hive_path],
115
+ @klass.new([], 'foo').command
116
+ end
117
+
118
+ should 'provide invocation with merged aux jars given otherwise bare stuff' do
119
+ ::ENV.stubs(:[]).with('HIVE_AUX_JARS_PATH').returns(orig = mock.to_s)
120
+ additional = mock.to_s
121
+ assert_equal [{'HIVE_AUX_JARS_PATH' => "#{additional},#{orig}"}, @hive_path],
122
+ @klass.new([], additional).command
60
123
  end
61
124
 
62
125
  should 'provide invocation for hive with all given parameters' do
63
126
  params = [mock().to_s, mock().to_s, mock().to_s, mock().to_s]
64
- assert_equal [@hive_path] + params,
127
+ assert_equal [{}, @hive_path] + params,
65
128
  @klass.new(params).command
66
129
  end
67
130
 
@@ -72,7 +135,7 @@ class Hadupils::RunnersTest < Test::Unit::TestCase
72
135
  p2.expects(:hive_opts).with.returns(p2_opts = ['-i', mock().to_s])
73
136
  s1 = mock().to_s
74
137
  s2 = mock().to_s
75
- assert_equal [@hive_path, s1] + p1_opts + [s2] + p2_opts,
138
+ assert_equal [{}, @hive_path, s1] + p1_opts + [s2] + p2_opts,
76
139
  @klass.new([s1, p1, s2, p2]).command
77
140
  end
78
141
  end
@@ -80,4 +80,22 @@ class Hadupils::SearchTest < Test::Unit::TestCase
80
80
  assert_respond_to Hadupils::Search, :hadoop_assets_name=
81
81
  end
82
82
  end
83
+
84
+ context 'hive_extensions' do
85
+ should 'search for a directory specified by #hive_extensions_name' do
86
+ Hadupils::Search.expects(:hive_extensions_name).with.returns(name = mock.to_s)
87
+ Hadupils::Search.expects(:find_from_pwd).with(name).returns(dir = mock)
88
+ assert_equal dir, Hadupils::Search.hive_extensions
89
+ end
90
+ end
91
+
92
+ context 'hive_extensions_name' do
93
+ should 'default to "hive-ext"' do
94
+ assert_equal 'hive-ext', Hadupils::Search.hive_extensions_name
95
+ end
96
+
97
+ should 'be settable' do
98
+ assert_respond_to Hadupils::Search, :hive_extensions_name=
99
+ end
100
+ end
83
101
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hadupils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -85,14 +85,16 @@ files:
85
85
  - lib/hadupils/search.rb
86
86
  - lib/hadupils/commands.rb
87
87
  - lib/hadupils/runners.rb
88
+ - lib/hadupils/extensions/hive.rb
88
89
  - lib/hadupils/extensions.rb
89
90
  - lib/hadupils/assets.rb
90
91
  - lib/hadupils.rb
91
92
  - test/unit/assets_test.rb
92
93
  - test/unit/commands_test.rb
93
- - test/unit/extensions_test.rb
94
94
  - test/unit/runners_test.rb
95
95
  - test/unit/search_test.rb
96
+ - test/unit/extensions/base_test.rb
97
+ - test/unit/extensions/hive_test.rb
96
98
  - test/hadupil_test_setup.rb
97
99
  - bin/hadupils
98
100
  - Rakefile.rb