wukong-deploy 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +6 -1
- data/README.md +47 -0
- data/examples/.gitkeep +0 -0
- data/lib/wukong-deploy/configuration.rb +24 -0
- data/lib/wukong-deploy/driver.rb +1 -3
- data/lib/wukong-deploy/templater.rb +50 -11
- data/lib/wukong-deploy/templater/conflict_resolution.rb +57 -0
- data/lib/wukong-deploy/templater/differ.rb +90 -0
- data/lib/wukong-deploy/templater/messaging.rb +32 -0
- data/lib/wukong-deploy/version.rb +1 -1
- data/spec/spec_helper.rb +10 -0
- data/spec/support/integration_helper.rb +38 -0
- data/spec/wukong-deploy/wu_deploy_spec.rb +94 -0
- data/templates/README.md.erb +214 -1
- data/templates/config/application.rb.erb +0 -2
- data/wukong-deploy.gemspec +4 -6
- metadata +28 -18
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -38,6 +38,53 @@ The deploy pack is installed as a RubyGem:
|
|
38
38
|
$ sudo gem install wukong-deploy
|
39
39
|
```
|
40
40
|
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
Wukong-Deploy provides a command-line tool `wu-deploy` which can be
|
44
|
+
used to create or interact with deploy packs.
|
45
|
+
|
46
|
+
### Creating a New Deploy Pack
|
47
|
+
|
48
|
+
Create a new deploy pack:
|
49
|
+
|
50
|
+
```
|
51
|
+
$ wu-deploy new my_app
|
52
|
+
Within /home/user/my_app:
|
53
|
+
create .
|
54
|
+
create app/models
|
55
|
+
create app/processors
|
56
|
+
...
|
57
|
+
```
|
58
|
+
|
59
|
+
This will create a directory `my_app` in the current directory.
|
60
|
+
Passing the `dry_run` option will print what should happen without
|
61
|
+
actually doing anything:
|
62
|
+
|
63
|
+
```
|
64
|
+
$ wu-deploy new my_app --dry_run
|
65
|
+
Within /home/user/my_app:
|
66
|
+
create .
|
67
|
+
create app/models
|
68
|
+
create app/processors
|
69
|
+
...
|
70
|
+
```
|
71
|
+
|
72
|
+
You'll be prompted if there is a conflict. You can pass the `force`
|
73
|
+
option to always overwrite files and the `skip` option to never
|
74
|
+
overwrite files.
|
75
|
+
|
76
|
+
### Working with an Existing Deploy Pack
|
77
|
+
|
78
|
+
If your current directory is within an existing deploy pack you can
|
79
|
+
start up an IRB console with the deploy pack's environment already
|
80
|
+
loaded:
|
81
|
+
|
82
|
+
```
|
83
|
+
$ wu-deploy console
|
84
|
+
irb(main):001:0>
|
85
|
+
```
|
86
|
+
|
87
|
+
|
41
88
|
## File Structure
|
42
89
|
|
43
90
|
A deploy pack is a repository with the following
|
data/examples/.gitkeep
ADDED
File without changes
|
@@ -12,6 +12,30 @@ module Wukong
|
|
12
12
|
case executable
|
13
13
|
when 'wu-hadoop'
|
14
14
|
Wukong::Elasticsearch.configure(settings) if executable == 'wu-hadoop'
|
15
|
+
when 'wu-deploy'
|
16
|
+
settings.define(:dry_run, :description => "Don't actually create or modify anything", :type => :boolean, :default => false)
|
17
|
+
settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
|
18
|
+
settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
|
19
|
+
def settings.usage
|
20
|
+
"usage: wu-deploy ACTION [ --param=val | --param | -p val | -p ] ..."
|
21
|
+
end
|
22
|
+
settings.use(:commandline)
|
23
|
+
settings.description = <<EOF
|
24
|
+
wu-deploy is a tool for creating and interacting with deploy packs.
|
25
|
+
|
26
|
+
You can create a new deploy pack
|
27
|
+
|
28
|
+
$ wu-deploy new my_app
|
29
|
+
|
30
|
+
The `--force' and `--skip' options can be used to control how conflict
|
31
|
+
resolution works when creating files. The `--dry_run` option can be
|
32
|
+
used to see what happens without doing it.
|
33
|
+
|
34
|
+
If you are within the directory of a deploy pack, you can enter an IRB
|
35
|
+
console with the deploy pack's environment already loaded:
|
36
|
+
|
37
|
+
$ wu-deploy console
|
38
|
+
EOF
|
15
39
|
end
|
16
40
|
settings
|
17
41
|
end
|
data/lib/wukong-deploy/driver.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
1
|
module Wukong
|
4
2
|
module Deploy
|
5
3
|
class Driver
|
@@ -21,7 +19,7 @@ module Wukong
|
|
21
19
|
when 'new'
|
22
20
|
require_relative('templater')
|
23
21
|
raise Error.new("Must provide a path to the root of the deploy pack you want to create") if args[1].nil? || args[1].blank?
|
24
|
-
Templater.new(File.expand_path(args[1], Dir.pwd)).
|
22
|
+
Templater.new(File.expand_path(args[1], Dir.pwd), settings).run!
|
25
23
|
when 'console'
|
26
24
|
require_relative('console')
|
27
25
|
Wukong::Deploy::Console.new.run!
|
@@ -3,6 +3,8 @@ require 'fileutils'
|
|
3
3
|
require 'erubis'
|
4
4
|
|
5
5
|
require_relative('repo')
|
6
|
+
require_relative('templater/messaging')
|
7
|
+
require_relative('templater/conflict_resolution')
|
6
8
|
|
7
9
|
module Wukong
|
8
10
|
module Deploy
|
@@ -11,22 +13,35 @@ module Wukong
|
|
11
13
|
attr_accessor :repo
|
12
14
|
attr_accessor :options
|
13
15
|
|
14
|
-
include FileUtils
|
16
|
+
include FileUtils
|
17
|
+
include Messaging
|
18
|
+
include ConflictResolution
|
15
19
|
|
16
20
|
def initialize root, options={}
|
17
21
|
self.repo = Repo.new(root)
|
18
22
|
self.options = options
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
25
|
+
def dry_run?
|
26
|
+
@options[:dry_run]
|
27
|
+
end
|
28
|
+
|
29
|
+
def run!
|
30
|
+
if dry_run?
|
31
|
+
puts "Would perform the following actions in #{repo.root}"
|
32
|
+
else
|
33
|
+
puts "Within #{repo.root}:"
|
34
|
+
end
|
22
35
|
create_dirs
|
23
36
|
create_templates
|
24
37
|
create_gitkeeps
|
25
38
|
create_gitignore
|
26
39
|
end
|
27
|
-
|
40
|
+
|
28
41
|
def create_dirs
|
29
|
-
repo.dirs_to_create.each
|
42
|
+
repo.dirs_to_create.each do |dir|
|
43
|
+
create_directory(dir)
|
44
|
+
end
|
30
45
|
end
|
31
46
|
|
32
47
|
def create_templates
|
@@ -36,18 +51,16 @@ module Wukong
|
|
36
51
|
end
|
37
52
|
|
38
53
|
def create_template input_path, output_path, binding={}
|
39
|
-
input
|
40
|
-
erb
|
41
|
-
|
42
|
-
|
43
|
-
puts "#{action} #{output_path}"
|
44
|
-
File.open(output_path, 'w') { |f| f.puts(output) }
|
54
|
+
input = File.read(input_path)
|
55
|
+
erb = Erubis::Eruby.new(input)
|
56
|
+
content = erb.result(binding)
|
57
|
+
create_file(content, output_path)
|
45
58
|
end
|
46
59
|
|
47
60
|
def create_gitkeeps
|
48
61
|
repo.dirs_to_create.each do |dir|
|
49
62
|
if Dir[File.join(dir, '*')].empty?
|
50
|
-
|
63
|
+
create_file(empty_file, File.join(dir, '.gitkeep'))
|
51
64
|
end
|
52
65
|
end
|
53
66
|
end
|
@@ -59,7 +72,33 @@ module Wukong
|
|
59
72
|
def templates_dir
|
60
73
|
@templates_dir ||= Pathname.new(File.expand_path('../../../templates', __FILE__))
|
61
74
|
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def empty_file
|
79
|
+
""
|
80
|
+
end
|
62
81
|
|
82
|
+
def create_file content, path
|
83
|
+
if File.exist?(path)
|
84
|
+
handle_conflict(content, path)
|
85
|
+
else
|
86
|
+
message_create(path)
|
87
|
+
write_file(content, path)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def create_directory(dir)
|
92
|
+
message_create(dir)
|
93
|
+
return if dry_run?
|
94
|
+
mkdir_p(dir)
|
95
|
+
end
|
96
|
+
|
97
|
+
def write_file content, path
|
98
|
+
return if dry_run?
|
99
|
+
File.open(path, 'w') { |f| f.write(content) }
|
100
|
+
end
|
101
|
+
|
63
102
|
end
|
64
103
|
end
|
65
104
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative("differ")
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
module ConflictResolution
|
6
|
+
|
7
|
+
def handle_conflict(new_content, path)
|
8
|
+
existing_content = File.read(path)
|
9
|
+
case
|
10
|
+
when new_content == existing_content
|
11
|
+
message_same(path)
|
12
|
+
when always_replace?
|
13
|
+
message_replace(path)
|
14
|
+
write_file(new_content, path)
|
15
|
+
when never_replace?
|
16
|
+
message_skip(path)
|
17
|
+
else
|
18
|
+
message_conflict(path)
|
19
|
+
diff!(new_content, existing_content, path)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def always_replace?
|
24
|
+
@always_replace || options[:force]
|
25
|
+
end
|
26
|
+
|
27
|
+
def always_replace!
|
28
|
+
@always_replace = true
|
29
|
+
end
|
30
|
+
|
31
|
+
def never_replace?
|
32
|
+
@never_replace || options[:skip]
|
33
|
+
end
|
34
|
+
|
35
|
+
def never_replace!
|
36
|
+
@never_replace = true
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# :nodoc:
|
42
|
+
def diff! new_content, existing_content, path
|
43
|
+
differ = Differ.new(new_content, existing_content)
|
44
|
+
differ.resolve!
|
45
|
+
always_replace! if differ.always_replace?
|
46
|
+
never_replace! if differ.never_replace?
|
47
|
+
if differ.replace?
|
48
|
+
message_replace(path)
|
49
|
+
write_file(new_content, path)
|
50
|
+
else
|
51
|
+
message_skip(path)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'diffy'
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
class Differ
|
6
|
+
|
7
|
+
attr_accessor :new_content
|
8
|
+
attr_accessor :existing_content
|
9
|
+
|
10
|
+
def initialize new_content, existing_content
|
11
|
+
self.new_content = new_content
|
12
|
+
self.existing_content = existing_content
|
13
|
+
end
|
14
|
+
|
15
|
+
def replace?
|
16
|
+
@replace
|
17
|
+
end
|
18
|
+
|
19
|
+
def always_replace?
|
20
|
+
@always_replace
|
21
|
+
end
|
22
|
+
|
23
|
+
def never_replace?
|
24
|
+
@never_replace
|
25
|
+
end
|
26
|
+
|
27
|
+
def resolve!
|
28
|
+
response = get_response
|
29
|
+
case response
|
30
|
+
when /^y/
|
31
|
+
@replace = true
|
32
|
+
when /^Y/
|
33
|
+
@replace = true
|
34
|
+
@always_replace = true
|
35
|
+
when /^n/
|
36
|
+
@replace = false
|
37
|
+
when /^N/
|
38
|
+
@replace = false
|
39
|
+
@never_replace = true
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def diff
|
44
|
+
@diff = Diffy::Diff.new(new_content, existing_content, :allow_empty_diff => true, :diff => "-U 5", :include_diff_info => true)
|
45
|
+
end
|
46
|
+
|
47
|
+
def show_diff
|
48
|
+
puts ''
|
49
|
+
diff.each do |line|
|
50
|
+
puts " #{line}"
|
51
|
+
end
|
52
|
+
puts ''
|
53
|
+
end
|
54
|
+
|
55
|
+
def show_help
|
56
|
+
puts " Y - yes, overwrite this file and all other conflicts"
|
57
|
+
puts " y - yes, overwrite this file"
|
58
|
+
puts " N - no, skip this file and all other conflicts"
|
59
|
+
puts " n - no, skip this file"
|
60
|
+
puts " d - diff, show the differences between the existing file and the new file"
|
61
|
+
puts " q - quit, abort"
|
62
|
+
puts " h - help, show this help"
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_response
|
66
|
+
STDOUT.write ' Overwrite? (enter "h" for help) [YNynqdh]: '
|
67
|
+
begin
|
68
|
+
response = STDIN.readline.chomp.strip
|
69
|
+
rescue EOFError, Interrupt => e
|
70
|
+
exit(1)
|
71
|
+
end
|
72
|
+
case
|
73
|
+
when response =~ /^(y|n|Y|N)/
|
74
|
+
response
|
75
|
+
when response =~ /^q/i
|
76
|
+
exit(1)
|
77
|
+
when response =~ /^d/i
|
78
|
+
show_diff
|
79
|
+
get_response
|
80
|
+
when response =~ /^h/i
|
81
|
+
show_help
|
82
|
+
get_response
|
83
|
+
else
|
84
|
+
get_response
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
module Messaging
|
4
|
+
|
5
|
+
def message action, *objects
|
6
|
+
puts [action.rjust(25), ' ', objects.map { |path| Pathname.new(path).relative_path_from(repo.root).to_s }.join(' ')].join
|
7
|
+
end
|
8
|
+
|
9
|
+
def message_create *objects
|
10
|
+
message "\e[32m\e[1mcreate\e[0m", *objects
|
11
|
+
end
|
12
|
+
|
13
|
+
def message_conflict *objects
|
14
|
+
message "\e[31m\e[1mconflict\e[0m", *objects
|
15
|
+
end
|
16
|
+
|
17
|
+
def message_replace *objects
|
18
|
+
message "\e[31m\e[1mreplace\e[0m", *objects
|
19
|
+
end
|
20
|
+
|
21
|
+
def message_same *objects
|
22
|
+
message "\e[34m\e[1msame\e[0m", *objects
|
23
|
+
end
|
24
|
+
|
25
|
+
def message_skip *objects
|
26
|
+
message "\e[35m\e[1mskip\e[0m", *objects
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
require 'wukong-deploy'
|
3
|
+
require 'wukong/spec_helpers'
|
4
|
+
require_relative './support/integration_helper'
|
5
|
+
|
6
|
+
RSpec.configure do |config|
|
7
|
+
config.mock_with :rspec
|
8
|
+
include Wukong::SpecHelpers
|
9
|
+
include Wukong::Deploy::IntegrationHelper
|
10
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
module IntegrationHelper
|
4
|
+
|
5
|
+
def root
|
6
|
+
@root ||= Pathname.new(File.expand_path('../../..', __FILE__))
|
7
|
+
end
|
8
|
+
|
9
|
+
def lib_dir *args
|
10
|
+
root.join('lib', *args)
|
11
|
+
end
|
12
|
+
|
13
|
+
def bin_dir *args
|
14
|
+
root.join('bin', *args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def examples_dir *args
|
18
|
+
root.join('examples', *args)
|
19
|
+
end
|
20
|
+
|
21
|
+
def integration_env
|
22
|
+
{
|
23
|
+
"PATH" => [bin_dir.to_s, ENV["PATH"]].compact.join(':'),
|
24
|
+
"RUBYLIB" => [lib_dir.to_s, ENV["RUBYLIB"]].compact.join(':')
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def integration_cwd
|
29
|
+
root.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
def example_script *args
|
33
|
+
examples_dir(*args)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'wu-deploy' do
|
4
|
+
|
5
|
+
before {`rm -rf #{examples_dir('*')}` }
|
6
|
+
after {`rm -rf #{examples_dir('*')}` }
|
7
|
+
|
8
|
+
context "without arguments" do
|
9
|
+
subject { command('wu-deploy') }
|
10
|
+
it { should exit_with(:non_zero) }
|
11
|
+
it "displays a help message" do
|
12
|
+
should have_stderr(/usage: wu-deploy/)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context "creating a deploy pack" do
|
17
|
+
|
18
|
+
context "without a given path" do
|
19
|
+
subject { command('wu-deploy', 'new') }
|
20
|
+
it { should exit_with(:non_zero) }
|
21
|
+
it "prints an error message" do
|
22
|
+
should have_stderr(/path/)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "with a given path" do
|
27
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) }
|
28
|
+
it { should exit_with(0) }
|
29
|
+
it "prints the files its creating" do
|
30
|
+
should have_stdout(/create.*config/, /create.*Gemfile/, /create.*\.gitignore/)
|
31
|
+
end
|
32
|
+
it "creates files on disk" do
|
33
|
+
subject.run!
|
34
|
+
Dir[examples_dir('deploy_pack', '*')].should_not be_empty
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context "with the --dry_run flag" do
|
39
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), '--dry_run') }
|
40
|
+
it { should exit_with(0) }
|
41
|
+
it "prints the files its creating" do
|
42
|
+
should have_stdout(/create.*config/, /create.*Gemfile/, /create.*\.gitignore/)
|
43
|
+
end
|
44
|
+
it "doesn't create files on disk" do
|
45
|
+
subject.run!
|
46
|
+
Dir[examples_dir('deploy_pack', '*')].should be_empty
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "on top of an existing deploy pack" do
|
51
|
+
before { command('wu-deploy', 'new', examples_dir("deploy_pack")).run! }
|
52
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) }
|
53
|
+
it { should exit_with(0) }
|
54
|
+
it "prints the files its creating and which ones are the same" do
|
55
|
+
should have_stdout(/create.*config/, /same.*Gemfile/, /same.*\.gitignore/)
|
56
|
+
end
|
57
|
+
context "with conflicts" do
|
58
|
+
before do
|
59
|
+
File.open(examples_dir("deploy_pack", "Gemfile"), 'w') { |f| f.puts "new content" }
|
60
|
+
end
|
61
|
+
context "that are skipped by hand" do
|
62
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) < "n" }
|
63
|
+
it { should exit_with(0) }
|
64
|
+
it "prints the files it skipped" do
|
65
|
+
should have_stdout(/create.*config/, /skip.*Gemfile/, /same.*\.gitignore/)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
context "that are automatically skipped" do
|
69
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), "--skip") }
|
70
|
+
it { should exit_with(0) }
|
71
|
+
it "prints the files it skipped" do
|
72
|
+
should have_stdout(/create.*config/, /skip.*Gemfile/, /same.*\.gitignore/)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context "that are replaced" do
|
76
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) < "y" }
|
77
|
+
it { should exit_with(0) }
|
78
|
+
it "prints the files it replaced" do
|
79
|
+
should have_stdout(/create.*config/, /replace.*Gemfile/, /same.*\.gitignore/)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
context "that are automatically replaced" do
|
83
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), "--force") }
|
84
|
+
it { should exit_with(0) }
|
85
|
+
it "prints the files it replaced" do
|
86
|
+
should have_stdout(/create.*config/, /replace.*Gemfile/, /same.*\.gitignore/)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
data/templates/README.md.erb
CHANGED
@@ -1 +1,214 @@
|
|
1
|
-
Welcome to
|
1
|
+
f# Welcome to the Infochimps Platform!
|
2
|
+
|
3
|
+
The [Infochimps Platform](http://www.infochimps.com) is an end-to-end,
|
4
|
+
managed solution for building Big Data applications. It integrates
|
5
|
+
best-of-breed technologies like [Hadoop](http://hadoop.apache.org/),
|
6
|
+
[Storm](https://github.com/nathanmarz/storm),
|
7
|
+
[Kafka](http://incubator.apache.org/kafka/),
|
8
|
+
[MongoDB](http://www.mongodb.org/),
|
9
|
+
[ElasticSearch](http://www.elasticsearch.org/),
|
10
|
+
[HBase](http://hbase.apache.org/), &c. and provides simple interfaces
|
11
|
+
for accessing these powerful tools.
|
12
|
+
|
13
|
+
Computation, analytics, scripting, &c. are all handled by
|
14
|
+
[Wukong](http://github.com/infochimps-labs/wukong) within the
|
15
|
+
platform. Wukong is an abstract framework for defining computations
|
16
|
+
on data. Wukong processors and flows can run in many different
|
17
|
+
execution contexts including:
|
18
|
+
|
19
|
+
* locally on the command-line for testing or development purposes
|
20
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
21
|
+
* within Storm as part of a real-time data flow
|
22
|
+
|
23
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
24
|
+
developers to develop all their processors, flows, and jobs within.
|
25
|
+
The deploy pack can be thought of as a container for all the necessary
|
26
|
+
Wukong code and plugins useful in the context of an Infochimps
|
27
|
+
Platform application. It includes the following libraries:
|
28
|
+
|
29
|
+
* <a href="http://github.com/infochimps-labs/wukong/tree/3.0.0">wukong</a>: The core framework for writing processors and chaining them together.
|
30
|
+
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
31
|
+
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
32
|
+
* <a href="http://github.com/infochimps-labs/wukong-deploy">wukong-deploy</a>: Code for coordinating Wukong and its plugins in a deploy pack.
|
33
|
+
|
34
|
+
**This is your deploy pack!** You will build your data processing
|
35
|
+
pipelines and Hadoop jobs within this repo.
|
36
|
+
|
37
|
+
## Setup
|
38
|
+
|
39
|
+
### Dependencies
|
40
|
+
|
41
|
+
In order to install and run a deploy pack you need the following
|
42
|
+
dependencies:
|
43
|
+
|
44
|
+
#### Ruby 1.9.x
|
45
|
+
|
46
|
+
Wukong and the deploy pack framework will only run on Ruby 1.9. There
|
47
|
+
are a lot of [online
|
48
|
+
instructions](http://www.ruby-lang.org/en/downloads/) you can use to
|
49
|
+
get Ruby 1.9 (and RubyGems) installed and configured on your local
|
50
|
+
system.
|
51
|
+
|
52
|
+
If you use [rvm](https://rvm.io/) or
|
53
|
+
[rbenv](https://github.com/sstephenson/rbenv) to manage your Ruby
|
54
|
+
installations, make sure you install all gems appropriately and invoke
|
55
|
+
bundler appropriately in what follows.
|
56
|
+
|
57
|
+
#### Git
|
58
|
+
|
59
|
+
You'll need [Git](http://git-scm.com/) to push/pull your deploy pack
|
60
|
+
code to/from the Infochimps Platform.
|
61
|
+
|
62
|
+
### Creating/Cloning the Deploy Pack
|
63
|
+
|
64
|
+
The first thing you need to do to get started is get a local copy of
|
65
|
+
this deploy on your computer. If you have already been giving a
|
66
|
+
deploy pack by Infochimps then you'll want to clone it:
|
67
|
+
|
68
|
+
```
|
69
|
+
$ git clone <your-deploy-pack-git-url>
|
70
|
+
```
|
71
|
+
|
72
|
+
If you are creating a deploy pack from scratch you'll want to use the
|
73
|
+
`wu-deploy` tool to create the scaffold of your deploy pack for you:
|
74
|
+
|
75
|
+
```
|
76
|
+
$ sudo gem install wukong-deploy
|
77
|
+
$ wu-deploy new <my-app-name>
|
78
|
+
```
|
79
|
+
|
80
|
+
Once you have the deploy pack on disk, you can install the
|
81
|
+
dependencies and
|
82
|
+
|
83
|
+
### Installation
|
84
|
+
|
85
|
+
From within the root of your deploy pack run the following commands
|
86
|
+
|
87
|
+
```
|
88
|
+
$ sudo gem install bundler
|
89
|
+
$ bundle install --standalone
|
90
|
+
```
|
91
|
+
|
92
|
+
If you're using [rbenv](https://github.com/sstephenson/rbenv) you may
|
93
|
+
want to run `rbenv exec bundle install --standalone`.
|
94
|
+
|
95
|
+
Bundler will install all the necessary dependencies locally in a
|
96
|
+
directory called `bundle`. We use a `standalone` installation of your
|
97
|
+
application bundle because this makes it easier to connect code in the
|
98
|
+
deploy pack to frameworks like Hadoop, Storm, &c. when your code is
|
99
|
+
running within the Infochimps Platform.
|
100
|
+
|
101
|
+
### Configuration
|
102
|
+
|
103
|
+
Your deploy pack doesn't need any configuration out of the box. As
|
104
|
+
you begin to extend it you may add functionality which benefits from
|
105
|
+
the ability to be configured.
|
106
|
+
|
107
|
+
Put any configuration you want shared across all environments into the
|
108
|
+
file `config/settings.yml`. Override this with environment-specific
|
109
|
+
configuration in the appropriate file within `config/environments`.
|
110
|
+
|
111
|
+
As an example, you may write a processor like this:
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
Wukong.procesor(:configurable_decorator) do
|
115
|
+
field :suffix, String, :default => '.'
|
116
|
+
def process record
|
117
|
+
yield [record, suffix].join
|
118
|
+
end
|
119
|
+
end
|
120
|
+
```
|
121
|
+
|
122
|
+
This processor's `suffix` property can be set on the command-line:
|
123
|
+
|
124
|
+
```
|
125
|
+
$ cat input
|
126
|
+
1
|
127
|
+
2
|
128
|
+
3
|
129
|
+
$ cat input | wu-local configurable_decorator
|
130
|
+
1.
|
131
|
+
2.
|
132
|
+
3.
|
133
|
+
$ cat input | wu-local configurable_decorator --suffix=','
|
134
|
+
1,
|
135
|
+
2,
|
136
|
+
3,
|
137
|
+
|
138
|
+
You can also set the same property in a configuration file, scoped by
|
139
|
+
the name of the processor:
|
140
|
+
|
141
|
+
```yaml
|
142
|
+
# in config/settings.yml
|
143
|
+
---
|
144
|
+
|
145
|
+
configurable_decorator:
|
146
|
+
suffix: ,
|
147
|
+
```
|
148
|
+
|
149
|
+
which lets you the `--suffix` flag on the command-line while still
|
150
|
+
overriding the default setting. You can also put such settings in
|
151
|
+
environment specific files within `config/environments`.
|
152
|
+
|
153
|
+
## File Structure
|
154
|
+
|
155
|
+
A deploy pack is a repository with the following
|
156
|
+
[Rails](http://rubyonrails.org/)-like file structure:
|
157
|
+
|
158
|
+
```
|
159
|
+
├── app
|
160
|
+
│ ├── models
|
161
|
+
│ ├── processors
|
162
|
+
│ ├── flows
|
163
|
+
│ └── jobs
|
164
|
+
├── config
|
165
|
+
│ ├── environment.rb
|
166
|
+
│ ├── application.rb
|
167
|
+
│ ├── initializers
|
168
|
+
│ ├── settings.yml
|
169
|
+
│ └── environments
|
170
|
+
│ ├── development.yml
|
171
|
+
│ ├── production.yml
|
172
|
+
│ └── test.yml
|
173
|
+
├── data
|
174
|
+
├── Gemfile
|
175
|
+
├── Gemfile.lock
|
176
|
+
├── lib
|
177
|
+
├── log
|
178
|
+
├── Rakefile
|
179
|
+
├── spec
|
180
|
+
│ ├── spec_helper.rb
|
181
|
+
│ └── support
|
182
|
+
└── tmp
|
183
|
+
```
|
184
|
+
|
185
|
+
Let's look at it piece by piece:
|
186
|
+
|
187
|
+
* <b>app</b>: The directory with all the action. It's where you define:
|
188
|
+
* <b>models</b>: Your domain models or "nouns", which define and wrap the different kinds of data elements in your application. They are built using whatever framework you like (defaults to [Gorillib](http://github.com/infochimps-labs/gorillib))
|
189
|
+
* <b>processors</b>: Your fundamental operations or "verbs", which are passed records and parse, filter, augment, normalize, or split them.
|
190
|
+
* <b>flows</b>: Chain together processors into streaming flows for ingestion, real-time processing, or [complex event processing](http://en.wikipedia.org/wiki/Complex_event_processing) (CEP)
|
191
|
+
* <b>jobs</b>: Pair processors together to create batch jobs to run in Hadoop
|
192
|
+
* <b>config</b>: Where you place all application configuration for all environments
|
193
|
+
* <b>environment.rb</b>: Defines the runtime environment for all code, requiring and configuring all Wukong framework code. You shouldn't have to edit this file directly.
|
194
|
+
* <b>application.rb</b>: Require and configure libraries specific to your application. Choose a model framework, pick what application code gets loaded by default (vs. auto-loaded).
|
195
|
+
* <b>initializers</b>: Holds any files you need to load before <b>application.rb</b> here. Useful for requiring and configuring external libraries.
|
196
|
+
* <b>settings.yml</b>: Defines application-wide settings.
|
197
|
+
* <b>environments</b>: Defines environment-specific settings in YAML files named after the environment. Overrides <b>config/settings.yml</b>.
|
198
|
+
* <b>data</b>: Holds sample data in flat files. You'll develop and test your application using this data.
|
199
|
+
* <b>Gemfile</b> and <b>Gemfile.lock</b>: Defines how libraries are resolved with [Bundler](http://gembundler.com/).
|
200
|
+
* <b>lib</b>: Holds any code you want to use in your application but that isn't "part of" your application (like vendored libraries, Rake tasks, &c.).
|
201
|
+
* <b>log</b>: A good place to stash logs.
|
202
|
+
* <b>Rakefile</b>: Defines [Rake](http://rake.rubyforge.org/) tasks for the development, test, and deploy of your application.
|
203
|
+
* <b>spec</b>: Holds all your [RSpec](http://rspec.info/) unit tests.
|
204
|
+
* <b>spec_helper.rb</b>: Loads libraries you'll use during testing, includes spec helper libraries from Wukong.
|
205
|
+
* <b>support</b>: Holds support code for your tests.
|
206
|
+
* <b>tmp</b>: A good place to stash temporary files.
|
207
|
+
|
208
|
+
## Writing your first models, processors, flows, and jobs
|
209
|
+
|
210
|
+
Before you start developing, it might be helpful to read up on some of
|
211
|
+
the underlying documentation for Wukong and its plugins, specifically:
|
212
|
+
|
213
|
+
* on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
|
214
|
+
* on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
|
@@ -62,5 +62,3 @@ require 'gorillib/object/blank'
|
|
62
62
|
Dir[File.expand_path('../../app/models/**/*.rb', __FILE__)].each { |path| require(path) }
|
63
63
|
Dir[File.expand_path('../../app/processors/**/*.rb', __FILE__)].each { |path| require(path) }
|
64
64
|
Dir[File.expand_path('../../app/flows/**/*.rb', __FILE__)].each { |path| require(path) }
|
65
|
-
Dir[File.expand_path('../../app/jobs/**/*.rb', __FILE__)].each { |path| require(path) }
|
66
|
-
Dir[File.expand_path('../../app/**/*.rb', __FILE__)].each { |path| require(path) }
|
data/wukong-deploy.gemspec
CHANGED
@@ -35,13 +35,11 @@ Gem::Specification.new do |gem|
|
|
35
35
|
gem.test_files = gem.files.grep(/^spec/)
|
36
36
|
gem.require_paths = ['lib']
|
37
37
|
|
38
|
-
gem.add_dependency('wukong',
|
39
|
-
gem.add_dependency('wukong-hadoop')
|
40
|
-
gem.add_dependency('wonderdog')
|
38
|
+
gem.add_dependency('wukong', '3.0.0.pre3')
|
39
|
+
gem.add_dependency('wukong-hadoop', '>= 0.0.2')
|
40
|
+
gem.add_dependency('wonderdog', '>= 0.0.2')
|
41
41
|
gem.add_dependency('erubis')
|
42
|
-
|
42
|
+
gem.add_dependency('diffy')
|
43
43
|
gem.add_dependency('rake', '~> 0.9')
|
44
|
-
gem.add_development_dependency 'rspec', '~> 2'
|
45
|
-
|
46
44
|
end
|
47
45
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-deploy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-12-
|
14
|
+
date: 2012-12-17 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: wukong
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - '='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 3.0.0.
|
23
|
+
version: 3.0.0.pre3
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - '='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version: 3.0.0.
|
31
|
+
version: 3.0.0.pre3
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: wukong-hadoop
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -36,7 +36,7 @@ dependencies:
|
|
36
36
|
requirements:
|
37
37
|
- - ! '>='
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
39
|
+
version: 0.0.2
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ! '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.0.2
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: wonderdog
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 0.0.2
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
58
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ! '>='
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version:
|
63
|
+
version: 0.0.2
|
64
64
|
- !ruby/object:Gem::Dependency
|
65
65
|
name: erubis
|
66
66
|
requirement: !ruby/object:Gem::Requirement
|
@@ -78,37 +78,37 @@ dependencies:
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
81
|
+
name: diffy
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
|
-
- -
|
85
|
+
- - ! '>='
|
86
86
|
- !ruby/object:Gem::Version
|
87
|
-
version: '0
|
87
|
+
version: '0'
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
90
|
version_requirements: !ruby/object:Gem::Requirement
|
91
91
|
none: false
|
92
92
|
requirements:
|
93
|
-
- -
|
93
|
+
- - ! '>='
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: '0
|
95
|
+
version: '0'
|
96
96
|
- !ruby/object:Gem::Dependency
|
97
|
-
name:
|
97
|
+
name: rake
|
98
98
|
requirement: !ruby/object:Gem::Requirement
|
99
99
|
none: false
|
100
100
|
requirements:
|
101
101
|
- - ~>
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
104
|
-
type: :
|
103
|
+
version: '0.9'
|
104
|
+
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
none: false
|
108
108
|
requirements:
|
109
109
|
- - ~>
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '
|
111
|
+
version: '0.9'
|
112
112
|
description: ! " The Infochimps Platform is an end-to-end, managed solution for\n
|
113
113
|
\ building Big Data applications. It integrates best-of-breed\n technologies like
|
114
114
|
Hadoop, Storm, Kafka, MongoDB, ElasticSearch,\n HBase, &c. and provides simple
|
@@ -132,6 +132,7 @@ files:
|
|
132
132
|
- README.md
|
133
133
|
- Rakefile
|
134
134
|
- bin/wu-deploy
|
135
|
+
- examples/.gitkeep
|
135
136
|
- lib/wukong-deploy.rb
|
136
137
|
- lib/wukong-deploy/configuration.rb
|
137
138
|
- lib/wukong-deploy/console.rb
|
@@ -140,7 +141,13 @@ files:
|
|
140
141
|
- lib/wukong-deploy/repo.rb
|
141
142
|
- lib/wukong-deploy/tasks.rb
|
142
143
|
- lib/wukong-deploy/templater.rb
|
144
|
+
- lib/wukong-deploy/templater/conflict_resolution.rb
|
145
|
+
- lib/wukong-deploy/templater/differ.rb
|
146
|
+
- lib/wukong-deploy/templater/messaging.rb
|
143
147
|
- lib/wukong-deploy/version.rb
|
148
|
+
- spec/spec_helper.rb
|
149
|
+
- spec/support/integration_helper.rb
|
150
|
+
- spec/wukong-deploy/wu_deploy_spec.rb
|
144
151
|
- templates/Gemfile.erb
|
145
152
|
- templates/README.md.erb
|
146
153
|
- templates/Rakefile.erb
|
@@ -179,5 +186,8 @@ rubygems_version: 1.8.23
|
|
179
186
|
signing_key:
|
180
187
|
specification_version: 3
|
181
188
|
summary: Defines the deploy pack framework used by the Infochimps Platform
|
182
|
-
test_files:
|
189
|
+
test_files:
|
190
|
+
- spec/spec_helper.rb
|
191
|
+
- spec/support/integration_helper.rb
|
192
|
+
- spec/wukong-deploy/wu_deploy_spec.rb
|
183
193
|
has_rdoc:
|