wukong-deploy 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +6 -1
- data/README.md +47 -0
- data/examples/.gitkeep +0 -0
- data/lib/wukong-deploy/configuration.rb +24 -0
- data/lib/wukong-deploy/driver.rb +1 -3
- data/lib/wukong-deploy/templater.rb +50 -11
- data/lib/wukong-deploy/templater/conflict_resolution.rb +57 -0
- data/lib/wukong-deploy/templater/differ.rb +90 -0
- data/lib/wukong-deploy/templater/messaging.rb +32 -0
- data/lib/wukong-deploy/version.rb +1 -1
- data/spec/spec_helper.rb +10 -0
- data/spec/support/integration_helper.rb +38 -0
- data/spec/wukong-deploy/wu_deploy_spec.rb +94 -0
- data/templates/README.md.erb +214 -1
- data/templates/config/application.rb.erb +0 -2
- data/wukong-deploy.gemspec +4 -6
- metadata +28 -18
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -38,6 +38,53 @@ The deploy pack is installed as a RubyGem:
|
|
38
38
|
$ sudo gem install wukong-deploy
|
39
39
|
```
|
40
40
|
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
Wukong-Deploy provides a command-line tool `wu-deploy` which can be
|
44
|
+
used to create or interact with deploy packs.
|
45
|
+
|
46
|
+
### Creating a New Deploy Pack
|
47
|
+
|
48
|
+
Create a new deploy pack:
|
49
|
+
|
50
|
+
```
|
51
|
+
$ wu-deploy new my_app
|
52
|
+
Within /home/user/my_app:
|
53
|
+
create .
|
54
|
+
create app/models
|
55
|
+
create app/processors
|
56
|
+
...
|
57
|
+
```
|
58
|
+
|
59
|
+
This will create a directory `my_app` in the current directory.
|
60
|
+
Passing the `dry_run` option will print what should happen without
|
61
|
+
actually doing anything:
|
62
|
+
|
63
|
+
```
|
64
|
+
$ wu-deploy new my_app --dry_run
|
65
|
+
Within /home/user/my_app:
|
66
|
+
create .
|
67
|
+
create app/models
|
68
|
+
create app/processors
|
69
|
+
...
|
70
|
+
```
|
71
|
+
|
72
|
+
You'll be prompted if there is a conflict. You can pass the `force`
|
73
|
+
option to always overwrite files and the `skip` option to never
|
74
|
+
overwrite files.
|
75
|
+
|
76
|
+
### Working with an Existing Deploy Pack
|
77
|
+
|
78
|
+
If your current directory is within an existing deploy pack you can
|
79
|
+
start up an IRB console with the deploy pack's environment already
|
80
|
+
loaded:
|
81
|
+
|
82
|
+
```
|
83
|
+
$ wu-deploy console
|
84
|
+
irb(main):001:0>
|
85
|
+
```
|
86
|
+
|
87
|
+
|
41
88
|
## File Structure
|
42
89
|
|
43
90
|
A deploy pack is a repository with the following
|
data/examples/.gitkeep
ADDED
File without changes
|
@@ -12,6 +12,30 @@ module Wukong
|
|
12
12
|
case executable
|
13
13
|
when 'wu-hadoop'
|
14
14
|
Wukong::Elasticsearch.configure(settings) if executable == 'wu-hadoop'
|
15
|
+
when 'wu-deploy'
|
16
|
+
settings.define(:dry_run, :description => "Don't actually create or modify anything", :type => :boolean, :default => false)
|
17
|
+
settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
|
18
|
+
settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
|
19
|
+
def settings.usage
|
20
|
+
"usage: wu-deploy ACTION [ --param=val | --param | -p val | -p ] ..."
|
21
|
+
end
|
22
|
+
settings.use(:commandline)
|
23
|
+
settings.description = <<EOF
|
24
|
+
wu-deploy is a tool for creating and interacting with deploy packs.
|
25
|
+
|
26
|
+
You can create a new deploy pack
|
27
|
+
|
28
|
+
$ wu-deploy new my_app
|
29
|
+
|
30
|
+
The `--force' and `--skip' options can be used to control how conflict
|
31
|
+
resolution works when creating files. The `--dry_run` option can be
|
32
|
+
used to see what happens without doing it.
|
33
|
+
|
34
|
+
If you are within the directory of a deploy pack, you can enter an IRB
|
35
|
+
console with the deploy pack's environment already loaded:
|
36
|
+
|
37
|
+
$ wu-deploy console
|
38
|
+
EOF
|
15
39
|
end
|
16
40
|
settings
|
17
41
|
end
|
data/lib/wukong-deploy/driver.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
1
|
module Wukong
|
4
2
|
module Deploy
|
5
3
|
class Driver
|
@@ -21,7 +19,7 @@ module Wukong
|
|
21
19
|
when 'new'
|
22
20
|
require_relative('templater')
|
23
21
|
raise Error.new("Must provide a path to the root of the deploy pack you want to create") if args[1].nil? || args[1].blank?
|
24
|
-
Templater.new(File.expand_path(args[1], Dir.pwd)).
|
22
|
+
Templater.new(File.expand_path(args[1], Dir.pwd), settings).run!
|
25
23
|
when 'console'
|
26
24
|
require_relative('console')
|
27
25
|
Wukong::Deploy::Console.new.run!
|
@@ -3,6 +3,8 @@ require 'fileutils'
|
|
3
3
|
require 'erubis'
|
4
4
|
|
5
5
|
require_relative('repo')
|
6
|
+
require_relative('templater/messaging')
|
7
|
+
require_relative('templater/conflict_resolution')
|
6
8
|
|
7
9
|
module Wukong
|
8
10
|
module Deploy
|
@@ -11,22 +13,35 @@ module Wukong
|
|
11
13
|
attr_accessor :repo
|
12
14
|
attr_accessor :options
|
13
15
|
|
14
|
-
include FileUtils
|
16
|
+
include FileUtils
|
17
|
+
include Messaging
|
18
|
+
include ConflictResolution
|
15
19
|
|
16
20
|
def initialize root, options={}
|
17
21
|
self.repo = Repo.new(root)
|
18
22
|
self.options = options
|
19
23
|
end
|
20
24
|
|
21
|
-
def
|
25
|
+
def dry_run?
|
26
|
+
@options[:dry_run]
|
27
|
+
end
|
28
|
+
|
29
|
+
def run!
|
30
|
+
if dry_run?
|
31
|
+
puts "Would perform the following actions in #{repo.root}"
|
32
|
+
else
|
33
|
+
puts "Within #{repo.root}:"
|
34
|
+
end
|
22
35
|
create_dirs
|
23
36
|
create_templates
|
24
37
|
create_gitkeeps
|
25
38
|
create_gitignore
|
26
39
|
end
|
27
|
-
|
40
|
+
|
28
41
|
def create_dirs
|
29
|
-
repo.dirs_to_create.each
|
42
|
+
repo.dirs_to_create.each do |dir|
|
43
|
+
create_directory(dir)
|
44
|
+
end
|
30
45
|
end
|
31
46
|
|
32
47
|
def create_templates
|
@@ -36,18 +51,16 @@ module Wukong
|
|
36
51
|
end
|
37
52
|
|
38
53
|
def create_template input_path, output_path, binding={}
|
39
|
-
input
|
40
|
-
erb
|
41
|
-
|
42
|
-
|
43
|
-
puts "#{action} #{output_path}"
|
44
|
-
File.open(output_path, 'w') { |f| f.puts(output) }
|
54
|
+
input = File.read(input_path)
|
55
|
+
erb = Erubis::Eruby.new(input)
|
56
|
+
content = erb.result(binding)
|
57
|
+
create_file(content, output_path)
|
45
58
|
end
|
46
59
|
|
47
60
|
def create_gitkeeps
|
48
61
|
repo.dirs_to_create.each do |dir|
|
49
62
|
if Dir[File.join(dir, '*')].empty?
|
50
|
-
|
63
|
+
create_file(empty_file, File.join(dir, '.gitkeep'))
|
51
64
|
end
|
52
65
|
end
|
53
66
|
end
|
@@ -59,7 +72,33 @@ module Wukong
|
|
59
72
|
def templates_dir
|
60
73
|
@templates_dir ||= Pathname.new(File.expand_path('../../../templates', __FILE__))
|
61
74
|
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def empty_file
|
79
|
+
""
|
80
|
+
end
|
62
81
|
|
82
|
+
def create_file content, path
|
83
|
+
if File.exist?(path)
|
84
|
+
handle_conflict(content, path)
|
85
|
+
else
|
86
|
+
message_create(path)
|
87
|
+
write_file(content, path)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def create_directory(dir)
|
92
|
+
message_create(dir)
|
93
|
+
return if dry_run?
|
94
|
+
mkdir_p(dir)
|
95
|
+
end
|
96
|
+
|
97
|
+
def write_file content, path
|
98
|
+
return if dry_run?
|
99
|
+
File.open(path, 'w') { |f| f.write(content) }
|
100
|
+
end
|
101
|
+
|
63
102
|
end
|
64
103
|
end
|
65
104
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative("differ")
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
module ConflictResolution
|
6
|
+
|
7
|
+
def handle_conflict(new_content, path)
|
8
|
+
existing_content = File.read(path)
|
9
|
+
case
|
10
|
+
when new_content == existing_content
|
11
|
+
message_same(path)
|
12
|
+
when always_replace?
|
13
|
+
message_replace(path)
|
14
|
+
write_file(new_content, path)
|
15
|
+
when never_replace?
|
16
|
+
message_skip(path)
|
17
|
+
else
|
18
|
+
message_conflict(path)
|
19
|
+
diff!(new_content, existing_content, path)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def always_replace?
|
24
|
+
@always_replace || options[:force]
|
25
|
+
end
|
26
|
+
|
27
|
+
def always_replace!
|
28
|
+
@always_replace = true
|
29
|
+
end
|
30
|
+
|
31
|
+
def never_replace?
|
32
|
+
@never_replace || options[:skip]
|
33
|
+
end
|
34
|
+
|
35
|
+
def never_replace!
|
36
|
+
@never_replace = true
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
# :nodoc:
|
42
|
+
def diff! new_content, existing_content, path
|
43
|
+
differ = Differ.new(new_content, existing_content)
|
44
|
+
differ.resolve!
|
45
|
+
always_replace! if differ.always_replace?
|
46
|
+
never_replace! if differ.never_replace?
|
47
|
+
if differ.replace?
|
48
|
+
message_replace(path)
|
49
|
+
write_file(new_content, path)
|
50
|
+
else
|
51
|
+
message_skip(path)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'diffy'
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
class Differ
|
6
|
+
|
7
|
+
attr_accessor :new_content
|
8
|
+
attr_accessor :existing_content
|
9
|
+
|
10
|
+
def initialize new_content, existing_content
|
11
|
+
self.new_content = new_content
|
12
|
+
self.existing_content = existing_content
|
13
|
+
end
|
14
|
+
|
15
|
+
def replace?
|
16
|
+
@replace
|
17
|
+
end
|
18
|
+
|
19
|
+
def always_replace?
|
20
|
+
@always_replace
|
21
|
+
end
|
22
|
+
|
23
|
+
def never_replace?
|
24
|
+
@never_replace
|
25
|
+
end
|
26
|
+
|
27
|
+
def resolve!
|
28
|
+
response = get_response
|
29
|
+
case response
|
30
|
+
when /^y/
|
31
|
+
@replace = true
|
32
|
+
when /^Y/
|
33
|
+
@replace = true
|
34
|
+
@always_replace = true
|
35
|
+
when /^n/
|
36
|
+
@replace = false
|
37
|
+
when /^N/
|
38
|
+
@replace = false
|
39
|
+
@never_replace = true
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def diff
|
44
|
+
@diff = Diffy::Diff.new(new_content, existing_content, :allow_empty_diff => true, :diff => "-U 5", :include_diff_info => true)
|
45
|
+
end
|
46
|
+
|
47
|
+
def show_diff
|
48
|
+
puts ''
|
49
|
+
diff.each do |line|
|
50
|
+
puts " #{line}"
|
51
|
+
end
|
52
|
+
puts ''
|
53
|
+
end
|
54
|
+
|
55
|
+
def show_help
|
56
|
+
puts " Y - yes, overwrite this file and all other conflicts"
|
57
|
+
puts " y - yes, overwrite this file"
|
58
|
+
puts " N - no, skip this file and all other conflicts"
|
59
|
+
puts " n - no, skip this file"
|
60
|
+
puts " d - diff, show the differences between the existing file and the new file"
|
61
|
+
puts " q - quit, abort"
|
62
|
+
puts " h - help, show this help"
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_response
|
66
|
+
STDOUT.write ' Overwrite? (enter "h" for help) [YNynqdh]: '
|
67
|
+
begin
|
68
|
+
response = STDIN.readline.chomp.strip
|
69
|
+
rescue EOFError, Interrupt => e
|
70
|
+
exit(1)
|
71
|
+
end
|
72
|
+
case
|
73
|
+
when response =~ /^(y|n|Y|N)/
|
74
|
+
response
|
75
|
+
when response =~ /^q/i
|
76
|
+
exit(1)
|
77
|
+
when response =~ /^d/i
|
78
|
+
show_diff
|
79
|
+
get_response
|
80
|
+
when response =~ /^h/i
|
81
|
+
show_help
|
82
|
+
get_response
|
83
|
+
else
|
84
|
+
get_response
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
module Messaging
|
4
|
+
|
5
|
+
def message action, *objects
|
6
|
+
puts [action.rjust(25), ' ', objects.map { |path| Pathname.new(path).relative_path_from(repo.root).to_s }.join(' ')].join
|
7
|
+
end
|
8
|
+
|
9
|
+
def message_create *objects
|
10
|
+
message "\e[32m\e[1mcreate\e[0m", *objects
|
11
|
+
end
|
12
|
+
|
13
|
+
def message_conflict *objects
|
14
|
+
message "\e[31m\e[1mconflict\e[0m", *objects
|
15
|
+
end
|
16
|
+
|
17
|
+
def message_replace *objects
|
18
|
+
message "\e[31m\e[1mreplace\e[0m", *objects
|
19
|
+
end
|
20
|
+
|
21
|
+
def message_same *objects
|
22
|
+
message "\e[34m\e[1msame\e[0m", *objects
|
23
|
+
end
|
24
|
+
|
25
|
+
def message_skip *objects
|
26
|
+
message "\e[35m\e[1mskip\e[0m", *objects
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
require 'wukong-deploy'
|
3
|
+
require 'wukong/spec_helpers'
|
4
|
+
require_relative './support/integration_helper'
|
5
|
+
|
6
|
+
RSpec.configure do |config|
|
7
|
+
config.mock_with :rspec
|
8
|
+
include Wukong::SpecHelpers
|
9
|
+
include Wukong::Deploy::IntegrationHelper
|
10
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
module IntegrationHelper
|
4
|
+
|
5
|
+
def root
|
6
|
+
@root ||= Pathname.new(File.expand_path('../../..', __FILE__))
|
7
|
+
end
|
8
|
+
|
9
|
+
def lib_dir *args
|
10
|
+
root.join('lib', *args)
|
11
|
+
end
|
12
|
+
|
13
|
+
def bin_dir *args
|
14
|
+
root.join('bin', *args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def examples_dir *args
|
18
|
+
root.join('examples', *args)
|
19
|
+
end
|
20
|
+
|
21
|
+
def integration_env
|
22
|
+
{
|
23
|
+
"PATH" => [bin_dir.to_s, ENV["PATH"]].compact.join(':'),
|
24
|
+
"RUBYLIB" => [lib_dir.to_s, ENV["RUBYLIB"]].compact.join(':')
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def integration_cwd
|
29
|
+
root.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
def example_script *args
|
33
|
+
examples_dir(*args)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'wu-deploy' do
|
4
|
+
|
5
|
+
before {`rm -rf #{examples_dir('*')}` }
|
6
|
+
after {`rm -rf #{examples_dir('*')}` }
|
7
|
+
|
8
|
+
context "without arguments" do
|
9
|
+
subject { command('wu-deploy') }
|
10
|
+
it { should exit_with(:non_zero) }
|
11
|
+
it "displays a help message" do
|
12
|
+
should have_stderr(/usage: wu-deploy/)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context "creating a deploy pack" do
|
17
|
+
|
18
|
+
context "without a given path" do
|
19
|
+
subject { command('wu-deploy', 'new') }
|
20
|
+
it { should exit_with(:non_zero) }
|
21
|
+
it "prints an error message" do
|
22
|
+
should have_stderr(/path/)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "with a given path" do
|
27
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) }
|
28
|
+
it { should exit_with(0) }
|
29
|
+
it "prints the files its creating" do
|
30
|
+
should have_stdout(/create.*config/, /create.*Gemfile/, /create.*\.gitignore/)
|
31
|
+
end
|
32
|
+
it "creates files on disk" do
|
33
|
+
subject.run!
|
34
|
+
Dir[examples_dir('deploy_pack', '*')].should_not be_empty
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context "with the --dry_run flag" do
|
39
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), '--dry_run') }
|
40
|
+
it { should exit_with(0) }
|
41
|
+
it "prints the files its creating" do
|
42
|
+
should have_stdout(/create.*config/, /create.*Gemfile/, /create.*\.gitignore/)
|
43
|
+
end
|
44
|
+
it "doesn't create files on disk" do
|
45
|
+
subject.run!
|
46
|
+
Dir[examples_dir('deploy_pack', '*')].should be_empty
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "on top of an existing deploy pack" do
|
51
|
+
before { command('wu-deploy', 'new', examples_dir("deploy_pack")).run! }
|
52
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) }
|
53
|
+
it { should exit_with(0) }
|
54
|
+
it "prints the files its creating and which ones are the same" do
|
55
|
+
should have_stdout(/create.*config/, /same.*Gemfile/, /same.*\.gitignore/)
|
56
|
+
end
|
57
|
+
context "with conflicts" do
|
58
|
+
before do
|
59
|
+
File.open(examples_dir("deploy_pack", "Gemfile"), 'w') { |f| f.puts "new content" }
|
60
|
+
end
|
61
|
+
context "that are skipped by hand" do
|
62
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) < "n" }
|
63
|
+
it { should exit_with(0) }
|
64
|
+
it "prints the files it skipped" do
|
65
|
+
should have_stdout(/create.*config/, /skip.*Gemfile/, /same.*\.gitignore/)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
context "that are automatically skipped" do
|
69
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), "--skip") }
|
70
|
+
it { should exit_with(0) }
|
71
|
+
it "prints the files it skipped" do
|
72
|
+
should have_stdout(/create.*config/, /skip.*Gemfile/, /same.*\.gitignore/)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
context "that are replaced" do
|
76
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack")) < "y" }
|
77
|
+
it { should exit_with(0) }
|
78
|
+
it "prints the files it replaced" do
|
79
|
+
should have_stdout(/create.*config/, /replace.*Gemfile/, /same.*\.gitignore/)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
context "that are automatically replaced" do
|
83
|
+
subject { command('wu-deploy', 'new', examples_dir("deploy_pack"), "--force") }
|
84
|
+
it { should exit_with(0) }
|
85
|
+
it "prints the files it replaced" do
|
86
|
+
should have_stdout(/create.*config/, /replace.*Gemfile/, /same.*\.gitignore/)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
data/templates/README.md.erb
CHANGED
@@ -1 +1,214 @@
|
|
1
|
-
Welcome to
|
1
|
+
f# Welcome to the Infochimps Platform!
|
2
|
+
|
3
|
+
The [Infochimps Platform](http://www.infochimps.com) is an end-to-end,
|
4
|
+
managed solution for building Big Data applications. It integrates
|
5
|
+
best-of-breed technologies like [Hadoop](http://hadoop.apache.org/),
|
6
|
+
[Storm](https://github.com/nathanmarz/storm),
|
7
|
+
[Kafka](http://incubator.apache.org/kafka/),
|
8
|
+
[MongoDB](http://www.mongodb.org/),
|
9
|
+
[ElasticSearch](http://www.elasticsearch.org/),
|
10
|
+
[HBase](http://hbase.apache.org/), &c. and provides simple interfaces
|
11
|
+
for accessing these powerful tools.
|
12
|
+
|
13
|
+
Computation, analytics, scripting, &c. are all handled by
|
14
|
+
[Wukong](http://github.com/infochimps-labs/wukong) within the
|
15
|
+
platform. Wukong is an abstract framework for defining computations
|
16
|
+
on data. Wukong processors and flows can run in many different
|
17
|
+
execution contexts including:
|
18
|
+
|
19
|
+
* locally on the command-line for testing or development purposes
|
20
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
21
|
+
* within Storm as part of a real-time data flow
|
22
|
+
|
23
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
24
|
+
developers to develop all their processors, flows, and jobs within.
|
25
|
+
The deploy pack can be thought of as a container for all the necessary
|
26
|
+
Wukong code and plugins useful in the context of an Infochimps
|
27
|
+
Platform application. It includes the following libraries:
|
28
|
+
|
29
|
+
* <a href="http://github.com/infochimps-labs/wukong/tree/3.0.0">wukong</a>: The core framework for writing processors and chaining them together.
|
30
|
+
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
31
|
+
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
32
|
+
* <a href="http://github.com/infochimps-labs/wukong-deploy">wukong-deploy</a>: Code for coordinating Wukong and its plugins in a deploy pack.
|
33
|
+
|
34
|
+
**This is your deploy pack!** You will build your data processing
|
35
|
+
pipelines and Hadoop jobs within this repo.
|
36
|
+
|
37
|
+
## Setup
|
38
|
+
|
39
|
+
### Dependencies
|
40
|
+
|
41
|
+
In order to install and run a deploy pack you need the following
|
42
|
+
dependencies:
|
43
|
+
|
44
|
+
#### Ruby 1.9.x
|
45
|
+
|
46
|
+
Wukong and the deploy pack framework will only run on Ruby 1.9. There
|
47
|
+
are a lot of [online
|
48
|
+
instructions](http://www.ruby-lang.org/en/downloads/) you can use to
|
49
|
+
get Ruby 1.9 (and RubyGems) installed and configured on your local
|
50
|
+
system.
|
51
|
+
|
52
|
+
If you use [rvm](https://rvm.io/) or
|
53
|
+
[rbenv](https://github.com/sstephenson/rbenv) to manage your Ruby
|
54
|
+
installations, make sure you install all gems appropriately and invoke
|
55
|
+
bundler appropriately in what follows.
|
56
|
+
|
57
|
+
#### Git
|
58
|
+
|
59
|
+
You'll need [Git](http://git-scm.com/) to push/pull your deploy pack
|
60
|
+
code to/from the Infochimps Platform.
|
61
|
+
|
62
|
+
### Creating/Cloning the Deploy Pack
|
63
|
+
|
64
|
+
The first thing you need to do to get started is get a local copy of
|
65
|
+
this deploy on your computer. If you have already been giving a
|
66
|
+
deploy pack by Infochimps then you'll want to clone it:
|
67
|
+
|
68
|
+
```
|
69
|
+
$ git clone <your-deploy-pack-git-url>
|
70
|
+
```
|
71
|
+
|
72
|
+
If you are creating a deploy pack from scratch you'll want to use the
|
73
|
+
`wu-deploy` tool to create the scaffold of your deploy pack for you:
|
74
|
+
|
75
|
+
```
|
76
|
+
$ sudo gem install wukong-deploy
|
77
|
+
$ wu-deploy new <my-app-name>
|
78
|
+
```
|
79
|
+
|
80
|
+
Once you have the deploy pack on disk, you can install the
|
81
|
+
dependencies and
|
82
|
+
|
83
|
+
### Installation
|
84
|
+
|
85
|
+
From within the root of your deploy pack run the following commands
|
86
|
+
|
87
|
+
```
|
88
|
+
$ sudo gem install bundler
|
89
|
+
$ bundle install --standalone
|
90
|
+
```
|
91
|
+
|
92
|
+
If you're using [rbenv](https://github.com/sstephenson/rbenv) you may
|
93
|
+
want to run `rbenv exec bundle install --standalone`.
|
94
|
+
|
95
|
+
Bundler will install all the necessary dependencies locally in a
|
96
|
+
directory called `bundle`. We use a `standalone` installation of your
|
97
|
+
application bundle because this makes it easier to connect code in the
|
98
|
+
deploy pack to frameworks like Hadoop, Storm, &c. when your code is
|
99
|
+
running within the Infochimps Platform.
|
100
|
+
|
101
|
+
### Configuration
|
102
|
+
|
103
|
+
Your deploy pack doesn't need any configuration out of the box. As
|
104
|
+
you begin to extend it you may add functionality which benefits from
|
105
|
+
the ability to be configured.
|
106
|
+
|
107
|
+
Put any configuration you want shared across all environments into the
|
108
|
+
file `config/settings.yml`. Override this with environment-specific
|
109
|
+
configuration in the appropriate file within `config/environments`.
|
110
|
+
|
111
|
+
As an example, you may write a processor like this:
|
112
|
+
|
113
|
+
```ruby
|
114
|
+
Wukong.procesor(:configurable_decorator) do
|
115
|
+
field :suffix, String, :default => '.'
|
116
|
+
def process record
|
117
|
+
yield [record, suffix].join
|
118
|
+
end
|
119
|
+
end
|
120
|
+
```
|
121
|
+
|
122
|
+
This processor's `suffix` property can be set on the command-line:
|
123
|
+
|
124
|
+
```
|
125
|
+
$ cat input
|
126
|
+
1
|
127
|
+
2
|
128
|
+
3
|
129
|
+
$ cat input | wu-local configurable_decorator
|
130
|
+
1.
|
131
|
+
2.
|
132
|
+
3.
|
133
|
+
$ cat input | wu-local configurable_decorator --suffix=','
|
134
|
+
1,
|
135
|
+
2,
|
136
|
+
3,
|
137
|
+
|
138
|
+
You can also set the same property in a configuration file, scoped by
|
139
|
+
the name of the processor:
|
140
|
+
|
141
|
+
```yaml
|
142
|
+
# in config/settings.yml
|
143
|
+
---
|
144
|
+
|
145
|
+
configurable_decorator:
|
146
|
+
suffix: ,
|
147
|
+
```
|
148
|
+
|
149
|
+
which lets you the `--suffix` flag on the command-line while still
|
150
|
+
overriding the default setting. You can also put such settings in
|
151
|
+
environment specific files within `config/environments`.
|
152
|
+
|
153
|
+
## File Structure
|
154
|
+
|
155
|
+
A deploy pack is a repository with the following
|
156
|
+
[Rails](http://rubyonrails.org/)-like file structure:
|
157
|
+
|
158
|
+
```
|
159
|
+
├── app
|
160
|
+
│ ├── models
|
161
|
+
│ ├── processors
|
162
|
+
│ ├── flows
|
163
|
+
│ └── jobs
|
164
|
+
├── config
|
165
|
+
│ ├── environment.rb
|
166
|
+
│ ├── application.rb
|
167
|
+
│ ├── initializers
|
168
|
+
│ ├── settings.yml
|
169
|
+
│ └── environments
|
170
|
+
│ ├── development.yml
|
171
|
+
│ ├── production.yml
|
172
|
+
│ └── test.yml
|
173
|
+
├── data
|
174
|
+
├── Gemfile
|
175
|
+
├── Gemfile.lock
|
176
|
+
├── lib
|
177
|
+
├── log
|
178
|
+
├── Rakefile
|
179
|
+
├── spec
|
180
|
+
│ ├── spec_helper.rb
|
181
|
+
│ └── support
|
182
|
+
└── tmp
|
183
|
+
```
|
184
|
+
|
185
|
+
Let's look at it piece by piece:
|
186
|
+
|
187
|
+
* <b>app</b>: The directory with all the action. It's where you define:
|
188
|
+
* <b>models</b>: Your domain models or "nouns", which define and wrap the different kinds of data elements in your application. They are built using whatever framework you like (defaults to [Gorillib](http://github.com/infochimps-labs/gorillib))
|
189
|
+
* <b>processors</b>: Your fundamental operations or "verbs", which are passed records and parse, filter, augment, normalize, or split them.
|
190
|
+
* <b>flows</b>: Chain together processors into streaming flows for ingestion, real-time processing, or [complex event processing](http://en.wikipedia.org/wiki/Complex_event_processing) (CEP)
|
191
|
+
* <b>jobs</b>: Pair processors together to create batch jobs to run in Hadoop
|
192
|
+
* <b>config</b>: Where you place all application configuration for all environments
|
193
|
+
* <b>environment.rb</b>: Defines the runtime environment for all code, requiring and configuring all Wukong framework code. You shouldn't have to edit this file directly.
|
194
|
+
* <b>application.rb</b>: Require and configure libraries specific to your application. Choose a model framework, pick what application code gets loaded by default (vs. auto-loaded).
|
195
|
+
* <b>initializers</b>: Holds any files you need to load before <b>application.rb</b> here. Useful for requiring and configuring external libraries.
|
196
|
+
* <b>settings.yml</b>: Defines application-wide settings.
|
197
|
+
* <b>environments</b>: Defines environment-specific settings in YAML files named after the environment. Overrides <b>config/settings.yml</b>.
|
198
|
+
* <b>data</b>: Holds sample data in flat files. You'll develop and test your application using this data.
|
199
|
+
* <b>Gemfile</b> and <b>Gemfile.lock</b>: Defines how libraries are resolved with [Bundler](http://gembundler.com/).
|
200
|
+
* <b>lib</b>: Holds any code you want to use in your application but that isn't "part of" your application (like vendored libraries, Rake tasks, &c.).
|
201
|
+
* <b>log</b>: A good place to stash logs.
|
202
|
+
* <b>Rakefile</b>: Defines [Rake](http://rake.rubyforge.org/) tasks for the development, test, and deploy of your application.
|
203
|
+
* <b>spec</b>: Holds all your [RSpec](http://rspec.info/) unit tests.
|
204
|
+
* <b>spec_helper.rb</b>: Loads libraries you'll use during testing, includes spec helper libraries from Wukong.
|
205
|
+
* <b>support</b>: Holds support code for your tests.
|
206
|
+
* <b>tmp</b>: A good place to stash temporary files.
|
207
|
+
|
208
|
+
## Writing your first models, processors, flows, and jobs
|
209
|
+
|
210
|
+
Before you start developing, it might be helpful to read up on some of
|
211
|
+
the underlying documentation for Wukong and its plugins, specifically:
|
212
|
+
|
213
|
+
* on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
|
214
|
+
* on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
|
@@ -62,5 +62,3 @@ require 'gorillib/object/blank'
|
|
62
62
|
Dir[File.expand_path('../../app/models/**/*.rb', __FILE__)].each { |path| require(path) }
|
63
63
|
Dir[File.expand_path('../../app/processors/**/*.rb', __FILE__)].each { |path| require(path) }
|
64
64
|
Dir[File.expand_path('../../app/flows/**/*.rb', __FILE__)].each { |path| require(path) }
|
65
|
-
Dir[File.expand_path('../../app/jobs/**/*.rb', __FILE__)].each { |path| require(path) }
|
66
|
-
Dir[File.expand_path('../../app/**/*.rb', __FILE__)].each { |path| require(path) }
|
data/wukong-deploy.gemspec
CHANGED
@@ -35,13 +35,11 @@ Gem::Specification.new do |gem|
|
|
35
35
|
gem.test_files = gem.files.grep(/^spec/)
|
36
36
|
gem.require_paths = ['lib']
|
37
37
|
|
38
|
-
gem.add_dependency('wukong',
|
39
|
-
gem.add_dependency('wukong-hadoop')
|
40
|
-
gem.add_dependency('wonderdog')
|
38
|
+
gem.add_dependency('wukong', '3.0.0.pre3')
|
39
|
+
gem.add_dependency('wukong-hadoop', '>= 0.0.2')
|
40
|
+
gem.add_dependency('wonderdog', '>= 0.0.2')
|
41
41
|
gem.add_dependency('erubis')
|
42
|
-
|
42
|
+
gem.add_dependency('diffy')
|
43
43
|
gem.add_dependency('rake', '~> 0.9')
|
44
|
-
gem.add_development_dependency 'rspec', '~> 2'
|
45
|
-
|
46
44
|
end
|
47
45
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-deploy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-12-
|
14
|
+
date: 2012-12-17 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: wukong
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - '='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 3.0.0.
|
23
|
+
version: 3.0.0.pre3
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - '='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version: 3.0.0.
|
31
|
+
version: 3.0.0.pre3
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: wukong-hadoop
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -36,7 +36,7 @@ dependencies:
|
|
36
36
|
requirements:
|
37
37
|
- - ! '>='
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
39
|
+
version: 0.0.2
|
40
40
|
type: :runtime
|
41
41
|
prerelease: false
|
42
42
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ! '>='
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 0.0.2
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: wonderdog
|
50
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 0.0.2
|
56
56
|
type: :runtime
|
57
57
|
prerelease: false
|
58
58
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -60,7 +60,7 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - ! '>='
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version:
|
63
|
+
version: 0.0.2
|
64
64
|
- !ruby/object:Gem::Dependency
|
65
65
|
name: erubis
|
66
66
|
requirement: !ruby/object:Gem::Requirement
|
@@ -78,37 +78,37 @@ dependencies:
|
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: '0'
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
|
-
name:
|
81
|
+
name: diffy
|
82
82
|
requirement: !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
|
-
- -
|
85
|
+
- - ! '>='
|
86
86
|
- !ruby/object:Gem::Version
|
87
|
-
version: '0
|
87
|
+
version: '0'
|
88
88
|
type: :runtime
|
89
89
|
prerelease: false
|
90
90
|
version_requirements: !ruby/object:Gem::Requirement
|
91
91
|
none: false
|
92
92
|
requirements:
|
93
|
-
- -
|
93
|
+
- - ! '>='
|
94
94
|
- !ruby/object:Gem::Version
|
95
|
-
version: '0
|
95
|
+
version: '0'
|
96
96
|
- !ruby/object:Gem::Dependency
|
97
|
-
name:
|
97
|
+
name: rake
|
98
98
|
requirement: !ruby/object:Gem::Requirement
|
99
99
|
none: false
|
100
100
|
requirements:
|
101
101
|
- - ~>
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
104
|
-
type: :
|
103
|
+
version: '0.9'
|
104
|
+
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
none: false
|
108
108
|
requirements:
|
109
109
|
- - ~>
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '
|
111
|
+
version: '0.9'
|
112
112
|
description: ! " The Infochimps Platform is an end-to-end, managed solution for\n
|
113
113
|
\ building Big Data applications. It integrates best-of-breed\n technologies like
|
114
114
|
Hadoop, Storm, Kafka, MongoDB, ElasticSearch,\n HBase, &c. and provides simple
|
@@ -132,6 +132,7 @@ files:
|
|
132
132
|
- README.md
|
133
133
|
- Rakefile
|
134
134
|
- bin/wu-deploy
|
135
|
+
- examples/.gitkeep
|
135
136
|
- lib/wukong-deploy.rb
|
136
137
|
- lib/wukong-deploy/configuration.rb
|
137
138
|
- lib/wukong-deploy/console.rb
|
@@ -140,7 +141,13 @@ files:
|
|
140
141
|
- lib/wukong-deploy/repo.rb
|
141
142
|
- lib/wukong-deploy/tasks.rb
|
142
143
|
- lib/wukong-deploy/templater.rb
|
144
|
+
- lib/wukong-deploy/templater/conflict_resolution.rb
|
145
|
+
- lib/wukong-deploy/templater/differ.rb
|
146
|
+
- lib/wukong-deploy/templater/messaging.rb
|
143
147
|
- lib/wukong-deploy/version.rb
|
148
|
+
- spec/spec_helper.rb
|
149
|
+
- spec/support/integration_helper.rb
|
150
|
+
- spec/wukong-deploy/wu_deploy_spec.rb
|
144
151
|
- templates/Gemfile.erb
|
145
152
|
- templates/README.md.erb
|
146
153
|
- templates/Rakefile.erb
|
@@ -179,5 +186,8 @@ rubygems_version: 1.8.23
|
|
179
186
|
signing_key:
|
180
187
|
specification_version: 3
|
181
188
|
summary: Defines the deploy pack framework used by the Infochimps Platform
|
182
|
-
test_files:
|
189
|
+
test_files:
|
190
|
+
- spec/spec_helper.rb
|
191
|
+
- spec/support/integration_helper.rb
|
192
|
+
- spec/wukong-deploy/wu_deploy_spec.rb
|
183
193
|
has_rdoc:
|