wukong-deploy 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/lib/wukong-deploy.rb +8 -1
- data/lib/wukong-deploy/deploy_pack.rb +52 -11
- data/lib/wukong-deploy/dump_extensions.rb +6 -0
- data/lib/wukong-deploy/extensions/ftp_syncer.rb +67 -0
- data/lib/wukong-deploy/extensions/prepare_syncer.rb +105 -0
- data/lib/wukong-deploy/extensions/s3_syncer.rb +59 -0
- data/lib/wukong-deploy/extensions/storm_invocation_override.rb +13 -0
- data/lib/wukong-deploy/extensions/uses_file_state.rb +17 -0
- data/lib/wukong-deploy/extensions/uses_lockfile.rb +17 -0
- data/lib/wukong-deploy/{extensions.rb → hadoop_extensions.rb} +0 -0
- data/lib/wukong-deploy/repo.rb +2 -0
- data/lib/wukong-deploy/storm_extensions.rb +2 -0
- data/lib/wukong-deploy/sync_extensions.rb +27 -0
- data/lib/wukong-deploy/tasks.rb +6 -1
- data/lib/wukong-deploy/version.rb +1 -1
- data/templates/Gemfile.erb +42 -4
- data/templates/README.md.erb +261 -35
- data/templates/config/boot.rb.erb +0 -11
- data/templates/config/initializers/event_machine.rb.erb +5 -0
- data/templates/gitignore +5 -0
- data/wukong-deploy.gemspec +2 -2
- metadata +18 -10
- data/bin/wu +0 -35
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/lib/wukong-deploy.rb
CHANGED
@@ -26,8 +26,15 @@ module Wukong
|
|
26
26
|
settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
|
27
27
|
settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
|
28
28
|
when 'wu-hadoop'
|
29
|
-
require_relative('wukong-deploy/
|
29
|
+
require_relative('wukong-deploy/hadoop_extensions')
|
30
30
|
settings[:command_prefix] = 'bundle exec'
|
31
|
+
when 'wu-storm'
|
32
|
+
require_relative('wukong-deploy/storm_extensions')
|
33
|
+
settings[:command_prefix] = 'bundle exec'
|
34
|
+
when 'wu-dump'
|
35
|
+
require_relative('wukong-deploy/dump_extensions')
|
36
|
+
when 'wu-sync', 'wu-sync-all'
|
37
|
+
require_relative('wukong-deploy/sync_extensions')
|
31
38
|
end
|
32
39
|
end
|
33
40
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
module Wukong
|
2
2
|
module Deploy
|
3
|
+
|
4
|
+
# Return the name of this deploy pack.
|
5
|
+
def self.name
|
6
|
+
settings[:application]
|
7
|
+
end
|
3
8
|
|
4
9
|
# Return the root directory of this deploy pack.
|
5
10
|
#
|
@@ -58,11 +63,11 @@ module Wukong
|
|
58
63
|
@root
|
59
64
|
end
|
60
65
|
|
61
|
-
# The directory this deploy pack
|
66
|
+
# The directory this deploy pack puts all its application code in.
|
62
67
|
#
|
63
68
|
# @return [Pathname]
|
64
|
-
def self.
|
65
|
-
root.join('
|
69
|
+
def self.app_dir
|
70
|
+
root.join('app')
|
66
71
|
end
|
67
72
|
|
68
73
|
# The directory this deploy pack uses for configuration files.
|
@@ -71,12 +76,19 @@ module Wukong
|
|
71
76
|
def self.config_dir
|
72
77
|
root.join('config')
|
73
78
|
end
|
79
|
+
|
80
|
+
# The directory this deploy pack keeps local, sample data in.
|
81
|
+
#
|
82
|
+
# @return [Pathname]
|
83
|
+
def self.data_dir
|
84
|
+
root.join('data')
|
85
|
+
end
|
74
86
|
|
75
|
-
# The directory this deploy pack uses for
|
87
|
+
# The directory this deploy pack uses for lib files.
|
76
88
|
#
|
77
89
|
# @return [Pathname]
|
78
|
-
def self.
|
79
|
-
root.join('
|
90
|
+
def self.lib_dir
|
91
|
+
root.join('lib')
|
80
92
|
end
|
81
93
|
|
82
94
|
# The directory this deploy pack uses for logs.
|
@@ -86,11 +98,33 @@ module Wukong
|
|
86
98
|
root.join('log')
|
87
99
|
end
|
88
100
|
|
89
|
-
# The
|
101
|
+
# The directory this deploy pack puts all its scripts in.
|
90
102
|
#
|
91
103
|
# @return [Pathname]
|
92
|
-
def self.
|
93
|
-
root.join('
|
104
|
+
def self.script_dir
|
105
|
+
root.join('script')
|
106
|
+
end
|
107
|
+
|
108
|
+
# The directory this deploy pack puts all its Ruby unit tests.
|
109
|
+
#
|
110
|
+
# @return [Pathname]
|
111
|
+
def self.spec_dir
|
112
|
+
root.join('spec')
|
113
|
+
end
|
114
|
+
|
115
|
+
# The directory this deploy pack puts all its non-Ruby source
|
116
|
+
# code.
|
117
|
+
#
|
118
|
+
# @return [Pathname]
|
119
|
+
def self.src_dir
|
120
|
+
root.join('src')
|
121
|
+
end
|
122
|
+
|
123
|
+
# The directory this deploy pack uses for temporary files.
|
124
|
+
#
|
125
|
+
# @return [Pathname]
|
126
|
+
def self.tmp_dir
|
127
|
+
root.join('tmp')
|
94
128
|
end
|
95
129
|
|
96
130
|
private
|
@@ -148,9 +182,16 @@ module Wukong
|
|
148
182
|
end
|
149
183
|
|
150
184
|
# Read remote settings.
|
151
|
-
#
|
152
|
-
# FIXME -- not implemented yet.
|
153
185
|
def self.read_remote_settings
|
186
|
+
return unless settings[:vcd]
|
187
|
+
if name.nil? || name.empty?
|
188
|
+
Wukong::Log.warn("Define the :application settings to give this deploy pack a name and be be able to read remote settings.")
|
189
|
+
return
|
190
|
+
end
|
191
|
+
topic = "deploy_packs.#{name}"
|
192
|
+
remote_settings = vayacondios_client.get(topic)
|
193
|
+
settings.merge(remote_settings) if remote_settings.success?
|
154
194
|
end
|
195
|
+
|
155
196
|
end
|
156
197
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module FTPSyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'ftp', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: true,
|
20
|
+
step: 'ftp',
|
21
|
+
files: self.files,
|
22
|
+
}.tap { |e| e[:duration] = duration if duration })
|
23
|
+
now = Time.now.utc.to_i
|
24
|
+
last_ftp_sync_summary = { state: 1, time: now}
|
25
|
+
last_ftp_sync_summary[:data] = now unless self.files.empty?
|
26
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", last_ftp_sync_summary)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Announces an error during a sync and updates the last sync
|
30
|
+
# state and time.
|
31
|
+
def on_error error
|
32
|
+
super(error)
|
33
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
34
|
+
success: false,
|
35
|
+
step: 'ftp',
|
36
|
+
error: error.class,
|
37
|
+
message: error.message,
|
38
|
+
files: self.files,
|
39
|
+
})
|
40
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", { state: 0, time: Time.now.utc.to_i })
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns the Vayacondios topic for this FTP syncer.
|
44
|
+
#
|
45
|
+
# @return [String] the Vayacondios topic
|
46
|
+
def vayacondios_topic
|
47
|
+
"listeners.#{name}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns a representation of this FTP syncer suitable for a
|
51
|
+
# Vayacondios stash.
|
52
|
+
#
|
53
|
+
# @return [Hash]
|
54
|
+
def to_vayacondios
|
55
|
+
{
|
56
|
+
protocol: settings[:protocol],
|
57
|
+
username: settings[:username],
|
58
|
+
password: (settings[:password] ? true : false),
|
59
|
+
host: settings[:host],
|
60
|
+
port: port,
|
61
|
+
path: settings[:path],
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module PrepareSyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'prepare', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: success?,
|
20
|
+
step: 'prepare',
|
21
|
+
counts: counts,
|
22
|
+
files: files,
|
23
|
+
}.tap { |e| e[:duration] = duration if duration })
|
24
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: (success? ? 1 : 0), time: Time.now.utc.to_i })
|
25
|
+
end
|
26
|
+
|
27
|
+
# Announces an error during a sync and updates the last sync
|
28
|
+
# state and time.
|
29
|
+
def on_error error
|
30
|
+
super(error)
|
31
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
32
|
+
success: false,
|
33
|
+
step: 'prepare',
|
34
|
+
error: error.class,
|
35
|
+
message: error.message,
|
36
|
+
counts: counts,
|
37
|
+
files: files,
|
38
|
+
})
|
39
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: 0, time: Time.now.utc.to_i })
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns the Vayacondios topic for this PrepareSyncer.
|
43
|
+
#
|
44
|
+
# @return [String] the Vayacondios topic
|
45
|
+
def vayacondios_topic
|
46
|
+
"listeners.#{name}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns a representation of this PrepareSyncer suitable for a
|
50
|
+
# Vayacondios stash.
|
51
|
+
#
|
52
|
+
# @return [Hash]
|
53
|
+
def to_vayacondios
|
54
|
+
{
|
55
|
+
split: settings[:split],
|
56
|
+
lines: settings[:lines],
|
57
|
+
bytes: settings[:bytes],
|
58
|
+
ordered: settings[:ordered],
|
59
|
+
metadata: settings[:metadata],
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
module HandlerOverride
|
64
|
+
|
65
|
+
# The topic for this Handler.
|
66
|
+
#
|
67
|
+
# Delegates to PrepareSyncer#vayacondios_topic.
|
68
|
+
#
|
69
|
+
# @return [String]
|
70
|
+
def vayacondios_topic
|
71
|
+
syncer.vayacondios_topic
|
72
|
+
end
|
73
|
+
|
74
|
+
# Announce the file was processed.
|
75
|
+
#
|
76
|
+
# @param [Pathname] original
|
77
|
+
def after_process original
|
78
|
+
super(original)
|
79
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
80
|
+
success: true,
|
81
|
+
step: 'prepare',
|
82
|
+
path: fragment_for(original),
|
83
|
+
size: File.size(original),
|
84
|
+
})
|
85
|
+
end
|
86
|
+
|
87
|
+
# Announce an error in processing a file.
|
88
|
+
#
|
89
|
+
# @param [Pathname] original
|
90
|
+
# @param [Error] error
|
91
|
+
def on_error original, error
|
92
|
+
super(original, error)
|
93
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
94
|
+
success: false,
|
95
|
+
step: 'prepare',
|
96
|
+
path: fragment_for(original),
|
97
|
+
error: error.class,
|
98
|
+
message: error.message
|
99
|
+
})
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module S3SyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 's3', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: true,
|
20
|
+
step: 's3',
|
21
|
+
bytes: (bytes || 0),
|
22
|
+
}.tap { |e| e[:duration] = duration if duration })
|
23
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 1, time: Time.now.utc.to_i })
|
24
|
+
end
|
25
|
+
|
26
|
+
# Announces an error during a sync and updates the last sync
|
27
|
+
# state and time.
|
28
|
+
def on_error error
|
29
|
+
super(error)
|
30
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
31
|
+
success: false,
|
32
|
+
step: 's3',
|
33
|
+
error: error.class,
|
34
|
+
message: error.message,
|
35
|
+
bytes: bytes,
|
36
|
+
})
|
37
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 0, time: Time.now.utc.to_i })
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the Vayacondios topic for this S3Syncer.
|
41
|
+
#
|
42
|
+
# @return [String] the Vayacondios topic
|
43
|
+
def vayacondios_topic
|
44
|
+
"listeners.#{name}"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a representation of this S3Syncer suitable for a
|
48
|
+
# Vayacondios stash.
|
49
|
+
#
|
50
|
+
# @return [Hash]
|
51
|
+
def to_vayacondios
|
52
|
+
{
|
53
|
+
bucket: s3_uri,
|
54
|
+
region: settings[:region],
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Makes `UsesFileState#file_state_dir` default to the
|
5
|
+
# `Wukong::Deploy.tmp_dir`.
|
6
|
+
module UsesFileStateOverride
|
7
|
+
|
8
|
+
# Overrides the default value of #file_state_dir with
|
9
|
+
# Wukong::Deploy.tmp_dir.
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
def file_state_dir
|
13
|
+
Wukong::Deploy.tmp_dir.to_s
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Makes `UsesLockfile#lockfile_dir` default to the
|
5
|
+
# `Wukong::Deploy.tmp_dir`.
|
6
|
+
module UsesLockfileOverride
|
7
|
+
|
8
|
+
# Overrides the default value of #lockfile_dir with
|
9
|
+
# Wukong::Deploy.tmp_dir.
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
def lockfile_dir
|
13
|
+
Wukong::Deploy.tmp_dir.to_s
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
File without changes
|
data/lib/wukong-deploy/repo.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative('extensions/uses_lockfile')
|
2
|
+
require_relative('extensions/uses_file_state')
|
3
|
+
require_relative('extensions/ftp_syncer')
|
4
|
+
require_relative('extensions/prepare_syncer')
|
5
|
+
require_relative('extensions/s3_syncer')
|
6
|
+
|
7
|
+
Wukong::Load::SyncRunner.class_eval do
|
8
|
+
include Wukong::Deploy::UsesLockfileOverride
|
9
|
+
end
|
10
|
+
if defined?(Wukong::Load::SyncAllRunner)
|
11
|
+
Wukong::Load::SyncAllRunner.class_eval do
|
12
|
+
include Wukong::Deploy::UsesLockfileOverride
|
13
|
+
end
|
14
|
+
end
|
15
|
+
Wukong::Load::FTPSyncer.class_eval do
|
16
|
+
include Wukong::Deploy::FTPSyncerOverride
|
17
|
+
end
|
18
|
+
Wukong::Load::PrepareSyncer.class_eval do
|
19
|
+
include Wukong::Deploy::UsesFileStateOverride
|
20
|
+
include Wukong::Deploy::PrepareSyncerOverride
|
21
|
+
end
|
22
|
+
Wukong::Load::PrepareSyncer::Handler.class_eval do
|
23
|
+
include Wukong::Deploy::PrepareSyncerOverride::HandlerOverride
|
24
|
+
end
|
25
|
+
Wukong::Load::S3Syncer.class_eval do
|
26
|
+
include Wukong::Deploy::S3SyncerOverride
|
27
|
+
end
|
data/lib/wukong-deploy/tasks.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
require 'rake'
|
2
2
|
|
3
|
+
require 'wukong/rake_helper'
|
4
|
+
|
3
5
|
require 'yard'
|
4
|
-
YARD::Rake::YardocTask.new(:doc)
|
6
|
+
YARD::Rake::YardocTask.new(:doc) do |t|
|
7
|
+
t.files = %w[app/**/*.rb Rakefile Gemfile]
|
8
|
+
t.options = ['--load', Wukong.doc_helpers_path]
|
9
|
+
end
|
5
10
|
|
6
11
|
require 'rspec/core/rake_task'
|
7
12
|
RSpec::Core::RakeTask.new(:spec)
|
data/templates/Gemfile.erb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
# Use the public [RubyGems server](
|
2
|
-
source
|
1
|
+
# Use the public [RubyGems server](https://rubygems.org/)
|
2
|
+
source 'https://rubygems.org'
|
3
3
|
|
4
4
|
# = Application
|
5
5
|
#
|
@@ -12,7 +12,7 @@ source :rubygems
|
|
12
12
|
#
|
13
13
|
# here and then remember to run
|
14
14
|
#
|
15
|
-
# $ bundle install
|
15
|
+
# $ bundle install
|
16
16
|
#
|
17
17
|
# Here's a link to the [Bundler documentation](http://gembundler.com/)
|
18
18
|
# for your convenience.
|
@@ -46,6 +46,7 @@ gem 'wonderdog', '0.1.0'
|
|
46
46
|
# Provides
|
47
47
|
#
|
48
48
|
# - wu-storm
|
49
|
+
# - wu-bolt
|
49
50
|
gem 'wukong-storm', '0.1.0'
|
50
51
|
|
51
52
|
# Lets you load JSON blobs into data stores.
|
@@ -56,13 +57,50 @@ gem 'wukong-storm', '0.1.0'
|
|
56
57
|
# - wu-source
|
57
58
|
gem 'wukong-load', '0.1.0'
|
58
59
|
|
59
|
-
#
|
60
|
+
# If you want to use wu-load with certain databases you may have to
|
61
|
+
# enable the various gems required for the database(s) you're
|
62
|
+
# interested in here.
|
63
|
+
group :load do
|
64
|
+
|
65
|
+
#
|
66
|
+
# Already required by one or more Wukong plugins:
|
67
|
+
#
|
68
|
+
# - Kafka
|
69
|
+
#
|
70
|
+
# Not required because uses Ruby standard library to connect:
|
71
|
+
#
|
72
|
+
# - ElasticSearch
|
73
|
+
#
|
74
|
+
|
75
|
+
# Enable MongoDB
|
76
|
+
#
|
77
|
+
# gem 'mongo'
|
78
|
+
# gem 'bson_ext'
|
79
|
+
|
80
|
+
# Enable MySQL
|
81
|
+
#
|
82
|
+
# gem 'mysql2'
|
83
|
+
end
|
84
|
+
|
85
|
+
# Framework for gluing Wukong plugins together into a deploy pack.
|
60
86
|
#
|
61
87
|
# Provides
|
62
88
|
#
|
63
89
|
# - wu-deploy
|
64
90
|
gem 'wukong-deploy', '0.1.0'
|
65
91
|
|
92
|
+
# Provides metadata about a deploy pack.
|
93
|
+
#
|
94
|
+
# Provides
|
95
|
+
#
|
96
|
+
# - wu-meta
|
97
|
+
#
|
98
|
+
gem 'wukong-meta', '0.1.0'
|
99
|
+
|
100
|
+
# Integrates Wukong processors with Vayacondios.
|
101
|
+
#
|
102
|
+
gem 'wukong-vayacondios', '0.1.0'
|
103
|
+
|
66
104
|
group :development do
|
67
105
|
gem 'yard'
|
68
106
|
gem 'redcarpet'
|
data/templates/README.md.erb
CHANGED
@@ -94,55 +94,84 @@ want to run `rbenv exec bundle install`.
|
|
94
94
|
|
95
95
|
### Configuration
|
96
96
|
|
97
|
-
|
98
|
-
you begin to extend it you may add functionality which benefits from
|
99
|
-
the ability to be configured.
|
97
|
+
#### Configuring the Environment
|
100
98
|
|
101
|
-
|
102
|
-
|
103
|
-
|
99
|
+
Before any of the `wu` programs can run, the Ruby process must first
|
100
|
+
boot up, require Wukong and all necessary dependencies (such as
|
101
|
+
'event-machine') and plugins (such as the deploy pack plugin
|
102
|
+
`wukong-deploy`), and then hand over control to the `wu` program.
|
104
103
|
|
105
|
-
|
104
|
+
The following Ruby files are loaded in order. Each file is
|
105
|
+
responsible for configuring some part of this runtime environment:
|
106
|
+
|
107
|
+
1. `config/environment` -- requires the rest of the files and adds any additional environmental code
|
108
|
+
2. `config/application` -- defines the load order of external libraries, Wukong plugins, and application code
|
109
|
+
3. `config/boot` -- defines how and where the Ruby process will look for code dependencies (through Bundler)
|
110
|
+
4. `config/initializers/*.rb` -- non-Wukong configuration for external libraries or application code can live here
|
111
|
+
|
112
|
+
#### Configuring the Application
|
113
|
+
|
114
|
+
The application a given deploy pack is running can be configured at
|
115
|
+
several different layers.
|
116
|
+
|
117
|
+
The simplest layer is settings passed to `wu` programs on the
|
118
|
+
command-line. These settings have the highest precedence and will
|
119
|
+
always be read.
|
120
|
+
|
121
|
+
When booting any of the `wu` tools the deploy pack will also read and
|
122
|
+
merge settings from the following configuration files, in order of
|
123
|
+
**increasing** precedence:
|
124
|
+
|
125
|
+
1. `config/settings.yml`
|
126
|
+
2. `config/settings/*.yml` if present, without any guarantee as to order
|
127
|
+
3. `config/environments/[environment].yml`
|
128
|
+
4. `config/environments/[environment]/*.yml` if present, without any guarantee as to order
|
129
|
+
5. `config/deploy.yml` if present (this file should be ignored by version control)
|
130
|
+
6. `config/environments/deploy-[environment].yml` if present (this file should be ignored by version control)
|
131
|
+
|
132
|
+
Finally, if interaction with Vayacondios is turned on, settings will
|
133
|
+
also be read from a Vayacondios stash (see the <a
|
134
|
+
href="#vayacondios>Vayacondios section</a> below).
|
135
|
+
|
136
|
+
Completely merged and resolved configuration settings are accessible
|
137
|
+
globally (once the Wukong framework has been booted) via the
|
138
|
+
`Wukong::Deploy.settings` object. Any piece of code in a model,
|
139
|
+
processor, dataflow, or elsewhere can read and write to this object.
|
140
|
+
|
141
|
+
Processors will *automatically* read settings for their fields from a
|
142
|
+
subhash within this global settings object. Given a processor like
|
106
143
|
|
107
144
|
```ruby
|
108
|
-
Wukong.
|
109
|
-
field :
|
110
|
-
def process
|
111
|
-
|
145
|
+
Wukong.processor(:tokenizer) do
|
146
|
+
field :min_length, Integer, default: 2
|
147
|
+
def process line
|
148
|
+
...
|
112
149
|
end
|
113
150
|
end
|
114
151
|
```
|
115
152
|
|
116
|
-
|
117
|
-
|
118
|
-
```
|
119
|
-
$ cat input
|
120
|
-
1
|
121
|
-
2
|
122
|
-
3
|
123
|
-
$ cat input | wu-local configurable_decorator
|
124
|
-
1.
|
125
|
-
2.
|
126
|
-
3.
|
127
|
-
$ cat input | wu-local configurable_decorator --suffix=','
|
128
|
-
1,
|
129
|
-
2,
|
130
|
-
3,
|
131
|
-
|
132
|
-
You can also set the same property in a configuration file, scoped by
|
133
|
-
the name of the processor:
|
153
|
+
you can set override the value of its `min_length` field by putting
|
154
|
+
the following section into any one of the configuration files above:
|
134
155
|
|
135
156
|
```yaml
|
136
|
-
# in config/settings.yml
|
137
157
|
---
|
158
|
+
# in config/settings.yml, for example
|
138
159
|
|
139
|
-
|
140
|
-
|
160
|
+
tokenizer:
|
161
|
+
min_length: 5
|
141
162
|
```
|
142
163
|
|
143
|
-
which
|
144
|
-
|
145
|
-
|
164
|
+
which would now make the command
|
165
|
+
|
166
|
+
```
|
167
|
+
$ cat corpus.txt | wu local tokenizer
|
168
|
+
```
|
169
|
+
|
170
|
+
have the same effect as
|
171
|
+
|
172
|
+
```
|
173
|
+
$ cat corpus.txt | wu local tokenizer --min_length=5
|
174
|
+
```
|
146
175
|
|
147
176
|
## File Structure
|
148
177
|
|
@@ -206,3 +235,200 @@ the underlying documentation for Wukong and its plugins, specifically:
|
|
206
235
|
|
207
236
|
* on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
|
208
237
|
* on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
|
238
|
+
|
239
|
+
|
240
|
+
<a target="#vayacondios">
|
241
|
+
## Interacting with Vayacondios
|
242
|
+
|
243
|
+
[Vayacondios](http://github.com/infochimps-labs/vayacondios) is a
|
244
|
+
program which makes it easy to for clients to announce events or read
|
245
|
+
and write settings to and from a central server.
|
246
|
+
|
247
|
+
The basic objects of Vayacondios are **stash** and the **event**:
|
248
|
+
|
249
|
+
* a **stash** is an "object", a "configuration", or "setting" designed to be shared among many services
|
250
|
+
* an **event** is a "fact", "measurement", or "metric" announced by an arbitrary service, possibly related to some stash
|
251
|
+
|
252
|
+
Stashes and events are organized in two levels.
|
253
|
+
|
254
|
+
The top-level is the **organization**. Data from multiple
|
255
|
+
organizations is stored together but accessed separately by a running
|
256
|
+
Vayacondios server. An organization could be the name of a user,
|
257
|
+
workgroup, application, or service using Vayacondios.
|
258
|
+
|
259
|
+
The next level is the **topic**. Each topic within Vayacondios has a
|
260
|
+
single stash and can have multiple events. An "object" like a server,
|
261
|
+
a database, an application, a service, or a user maps to the concept
|
262
|
+
of "topic".
|
263
|
+
|
264
|
+
Every `wu` tool running within a deploy pack takes an additional
|
265
|
+
option `--vcd` which turns on or off interactions with Vayacondios.
|
266
|
+
This option can be specified at runtime on the command-line as well as
|
267
|
+
via a configuration file. When not running "in Vayacondios mode"
|
268
|
+
(with `--vcd` was not passed), interactions with Vayacondios will be
|
269
|
+
logged instead of transmitted and received.
|
270
|
+
|
271
|
+
### Configuring Vayacondios access
|
272
|
+
|
273
|
+
If you don't intend to interact with a Vayacondios server, you can
|
274
|
+
just set `vcd` to `false` for your whole environment and skip this
|
275
|
+
section (as is done, for example, in the `test` environment by
|
276
|
+
default).
|
277
|
+
|
278
|
+
If you intend to interact with Vayacondios then you need to also
|
279
|
+
specify the `vcd_host` and `vcd_port` options which otherwise default
|
280
|
+
to the usual Vayacondios server port running on localhost.
|
281
|
+
|
282
|
+
```yaml
|
283
|
+
---
|
284
|
+
# in config/environments/production.yml
|
285
|
+
vcd_host: 10.123.123.123
|
286
|
+
vcd_port: 9000
|
287
|
+
```
|
288
|
+
|
289
|
+
Vayacondios also requires that all events and stashes are stored under
|
290
|
+
a given organization name. The Vayacondios organization, which will
|
291
|
+
likely be shared across all environments of your application, is
|
292
|
+
usually set at the top-level:
|
293
|
+
|
294
|
+
```yaml
|
295
|
+
---
|
296
|
+
# in config/settings.yml
|
297
|
+
organization: my_company
|
298
|
+
```
|
299
|
+
|
300
|
+
### Handle out of band event data with Events
|
301
|
+
|
302
|
+
Despite being designed to be powerful and scalable, Vayacondios is not
|
303
|
+
the appropriate store for high-volume, high-throughput,
|
304
|
+
mission-critical data which must be persisited over the long-term.
|
305
|
+
Instead it should be used for "out of band" data, which is typically
|
306
|
+
much smaller in volume and throughput than the main body of a
|
307
|
+
dataflow. Examples of such out of band events include:
|
308
|
+
|
309
|
+
* signalling some intermittend or runtime error
|
310
|
+
* warning that some event was bad or suspicious
|
311
|
+
* logging an error
|
312
|
+
* registering some periodic metric
|
313
|
+
* signaling a change in state
|
314
|
+
|
315
|
+
Announcements can be made from anywhere within the Wukong framework by
|
316
|
+
accessing the `Wukong::Deploy.vayacondios_client` object but the most
|
317
|
+
common approach is to announce events within a processor or within a
|
318
|
+
dataflow.
|
319
|
+
|
320
|
+
#### Announcing from a processor
|
321
|
+
|
322
|
+
The `Wukong::Processor#announce` method can be used to directly send
|
323
|
+
an event to Vayacondios on a given topic.
|
324
|
+
|
325
|
+
```ruby
|
326
|
+
Wukong.processor(:parser) do
|
327
|
+
def process line
|
328
|
+
yield parse!(line)
|
329
|
+
rescue ParseError => e
|
330
|
+
announce "parser.errors", line: line
|
331
|
+
end
|
332
|
+
end
|
333
|
+
```
|
334
|
+
|
335
|
+
It's important when setting up an announcement like this that you
|
336
|
+
consider how often this piece of code will actually send events to
|
337
|
+
Vayacondios. If a `ParseError` is triggered once in every 10,000
|
338
|
+
lines, this may be perfectly fine to be running in production. If 1
|
339
|
+
in 10 lines causes a similar error, this may not be the right
|
340
|
+
approach.
|
341
|
+
|
342
|
+
#### Announcing from a dataflow
|
343
|
+
|
344
|
+
The `announce` processor can be used to send all announce all incoming
|
345
|
+
events to Vayacondios. Here's an example flow which makes use of it:
|
346
|
+
|
347
|
+
```ruby
|
348
|
+
Wukong.dataflow(:parse_source) do
|
349
|
+
parser |
|
350
|
+
[
|
351
|
+
select(&:valid?) | ... | to_json,
|
352
|
+
select(&:invalid?) | announce(topic: "invalid_records")
|
353
|
+
]
|
354
|
+
end
|
355
|
+
```
|
356
|
+
|
357
|
+
Just as in the above example with a processor, it's important that the
|
358
|
+
flow through the announce processor is not incredibly high-volume.
|
359
|
+
|
360
|
+
The `announce` processor is terminal; it yields no output records.
|
361
|
+
|
362
|
+
### Allow dynamic configuration with Stashes
|
363
|
+
|
364
|
+
The deploy pack inside a backend system like Hadoop or Storm can fetch
|
365
|
+
stashes from Vayacondios during runtime. Other systems external to
|
366
|
+
the deploy pack can simultaneously be writing data into these same
|
367
|
+
stashes in Vayacondios, allowing for a lightweight, two-way
|
368
|
+
communication stream between the deploy pack and arbitrary external
|
369
|
+
resources, mediated by a key-value store (the Vayacondios stash).
|
370
|
+
|
371
|
+
Stashes can be read and written from anywhere within the Wukong
|
372
|
+
framework by accessing the `Wukong::Deploy.vayacondios_client` object
|
373
|
+
but there are two special places where encapsulated, remote settings
|
374
|
+
are very useful.
|
375
|
+
|
376
|
+
#### Dynamic settings for the deploy pack itself
|
377
|
+
|
378
|
+
Each deploy pack, as an application, can fetch a stash of settings
|
379
|
+
from Vayacondios and use this as bootup time in the same way it uses a
|
380
|
+
configuration file ond disk. All that is required is a Vayacondios
|
381
|
+
stash topic name. This is furnished by providing to the deploy pack
|
382
|
+
an `application` name in a configuration file, usually the top-level
|
383
|
+
one:
|
384
|
+
|
385
|
+
```yaml
|
386
|
+
---
|
387
|
+
# in config/settings.yml, for example
|
388
|
+
|
389
|
+
application: my_app
|
390
|
+
```
|
391
|
+
|
392
|
+
When any `wu` tool is launched within the deploy pack with the `--vcd`
|
393
|
+
option (possibly set an an environment-wide level via a configuration
|
394
|
+
file) then remote settings from Vayacondios for the `application` will
|
395
|
+
be pulled at boot-time and merged into the local settings from
|
396
|
+
configuration files and the command-line.
|
397
|
+
|
398
|
+
#### Dynamic settings for processors
|
399
|
+
|
400
|
+
The processor `tokenizer` in the deploy pack with application name
|
401
|
+
`my_app` defaults to using the stash with topic
|
402
|
+
`processors.my_app-tokenizer` in Vayacondios to store its settings
|
403
|
+
(this can be changed by overriding the `Wukong::Processor#vcd_topic`
|
404
|
+
method).
|
405
|
+
|
406
|
+
These settings, if they exist, can be retrieved and merged into the
|
407
|
+
processor's current fields at anytime using the
|
408
|
+
`Wukong::Processor#update_settings`. A common use case is to want to
|
409
|
+
update a processor's fields every 30 seconds, or similar. This is
|
410
|
+
most easily accomplished via the
|
411
|
+
`Wukong::Processor#update_settings_every` method. Here's an example
|
412
|
+
|
413
|
+
```ruby
|
414
|
+
Wukong.processor(:tagger) do
|
415
|
+
field :tags, Array, doc: "List of tags to check", default: []
|
416
|
+
|
417
|
+
def setup
|
418
|
+
update_settings_every(30)
|
419
|
+
end
|
420
|
+
|
421
|
+
def process record
|
422
|
+
tags.each do |tag|
|
423
|
+
...
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
```
|
428
|
+
|
429
|
+
The `tags` field of this processor will be updated every 30 seconds
|
430
|
+
with the latest values from Vayacondios.
|
431
|
+
|
432
|
+
The `Wukong::Processor#save_settings` and
|
433
|
+
`Wukong::Processor#save_settings_every` and methods can be used to
|
434
|
+
save settings from a processor **to** Vayacondios.
|
@@ -1,17 +1,6 @@
|
|
1
1
|
# = config/boot.rb
|
2
2
|
#
|
3
3
|
# Loads the Ruby language framework and activates Bundler.
|
4
|
-
#
|
5
|
-
# We can use a system-wide bundle installation _or_ a standalone
|
6
|
-
# bundle installation. If a standalone installation exists (at
|
7
|
-
# /bundle/bundler/setup.rb), we will detect and use it. Otherwise, we
|
8
|
-
# fall back on the system-wide bundle installation.
|
9
|
-
#
|
10
|
-
# Make sure you install the bundle locally with the following command:
|
11
|
-
#
|
12
|
-
# @example Install a standalone bundle of gems
|
13
|
-
#
|
14
|
-
# $ bundle install --standalone
|
15
4
|
|
16
5
|
local_setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
|
17
6
|
if File.exist?(local_setup_path)
|
@@ -0,0 +1,5 @@
|
|
1
|
+
# EventMachine hard-codes a very inconvenient maximum length for a
|
2
|
+
# line -- 16K. This is too small for many use cases and is enlarged
|
3
|
+
# here to 16M.
|
4
|
+
EventMachine::Protocols::LineAndTextProtocol.send(:remove_const, :MaxLineLength)
|
5
|
+
EventMachine::Protocols::LineAndTextProtocol.send(:const_set, :MaxLineLength, 16*1024*1024)
|
data/templates/gitignore
CHANGED
data/wukong-deploy.gemspec
CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |gem|
|
|
31
31
|
EOF
|
32
32
|
|
33
33
|
gem.files = `git ls-files`.split("\n")
|
34
|
-
gem.executables = ['wu-deploy'
|
34
|
+
gem.executables = ['wu-deploy']
|
35
35
|
gem.test_files = gem.files.grep(/^spec/)
|
36
36
|
gem.require_paths = ['lib']
|
37
37
|
|
38
|
-
gem.add_dependency('wukong', '
|
38
|
+
gem.add_dependency('wukong', '4.0.0')
|
39
39
|
gem.add_dependency('rake', '~> 0.9')
|
40
40
|
gem.add_dependency('erubis')
|
41
41
|
gem.add_dependency('diffy')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-deploy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2014-03-19 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: wukong
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - '='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 4.0.0
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - '='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: 4.0.0
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: rake
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,7 +91,6 @@ description: ! " The Infochimps Platform is an end-to-end, managed solution for
|
|
91
91
|
email: coders@infochimps.com
|
92
92
|
executables:
|
93
93
|
- wu-deploy
|
94
|
-
- wu
|
95
94
|
extensions: []
|
96
95
|
extra_rdoc_files: []
|
97
96
|
files:
|
@@ -102,17 +101,25 @@ files:
|
|
102
101
|
- LICENSE.md
|
103
102
|
- README.md
|
104
103
|
- Rakefile
|
105
|
-
- bin/wu
|
106
104
|
- bin/wu-deploy
|
107
105
|
- examples/.gitkeep
|
108
106
|
- lib/wukong-deploy.rb
|
109
107
|
- lib/wukong-deploy/console.rb
|
110
108
|
- lib/wukong-deploy/deploy_pack.rb
|
111
109
|
- lib/wukong-deploy/deploy_runner.rb
|
112
|
-
- lib/wukong-deploy/
|
110
|
+
- lib/wukong-deploy/dump_extensions.rb
|
111
|
+
- lib/wukong-deploy/extensions/ftp_syncer.rb
|
113
112
|
- lib/wukong-deploy/extensions/hadoop_invocation_override.rb
|
113
|
+
- lib/wukong-deploy/extensions/prepare_syncer.rb
|
114
|
+
- lib/wukong-deploy/extensions/s3_syncer.rb
|
115
|
+
- lib/wukong-deploy/extensions/storm_invocation_override.rb
|
116
|
+
- lib/wukong-deploy/extensions/uses_file_state.rb
|
117
|
+
- lib/wukong-deploy/extensions/uses_lockfile.rb
|
118
|
+
- lib/wukong-deploy/hadoop_extensions.rb
|
114
119
|
- lib/wukong-deploy/rake_runner.rb
|
115
120
|
- lib/wukong-deploy/repo.rb
|
121
|
+
- lib/wukong-deploy/storm_extensions.rb
|
122
|
+
- lib/wukong-deploy/sync_extensions.rb
|
116
123
|
- lib/wukong-deploy/tasks.rb
|
117
124
|
- lib/wukong-deploy/templater.rb
|
118
125
|
- lib/wukong-deploy/templater/conflict_resolution.rb
|
@@ -131,6 +138,7 @@ files:
|
|
131
138
|
- templates/config/environments/development.yml.erb
|
132
139
|
- templates/config/environments/production.yml.erb
|
133
140
|
- templates/config/environments/test.yml.erb
|
141
|
+
- templates/config/initializers/event_machine.rb.erb
|
134
142
|
- templates/config/settings.yml.erb
|
135
143
|
- templates/gitignore
|
136
144
|
- templates/spec/spec_helper.rb.erb
|
@@ -150,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
150
158
|
version: '0'
|
151
159
|
segments:
|
152
160
|
- 0
|
153
|
-
hash:
|
161
|
+
hash: -3128770076158943322
|
154
162
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
155
163
|
none: false
|
156
164
|
requirements:
|
@@ -159,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
159
167
|
version: '0'
|
160
168
|
segments:
|
161
169
|
- 0
|
162
|
-
hash:
|
170
|
+
hash: -3128770076158943322
|
163
171
|
requirements: []
|
164
172
|
rubyforge_project:
|
165
|
-
rubygems_version: 1.8.
|
173
|
+
rubygems_version: 1.8.23
|
166
174
|
signing_key:
|
167
175
|
specification_version: 3
|
168
176
|
summary: Defines the deploy pack framework used by the Infochimps Platform
|
data/bin/wu
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'shellwords'
|
3
|
-
now=Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
4
|
-
if ARGV.empty?
|
5
|
-
$stderr.puts "ERROR #{now} [wu ] -- Must provide a Wukong command to run. Try the --help option."
|
6
|
-
else
|
7
|
-
if ARGV.size == 1 && ARGV.first == '--help'
|
8
|
-
$stderr.puts <<EOF
|
9
|
-
usage: wu COMMAND [OPTIONS] [ARG] ...
|
10
|
-
|
11
|
-
wu is a wrapper for easy use of Wukong's command-line tools. It takes
|
12
|
-
your arguments, constructs the name of the proper wu-tool to call, and
|
13
|
-
prepends a call to bundle exec.
|
14
|
-
|
15
|
-
$ wu local ...
|
16
|
-
|
17
|
-
is equivalent to
|
18
|
-
|
19
|
-
$ bundle exec wu-local ...
|
20
|
-
|
21
|
-
You can run any of the wu-tools like
|
22
|
-
|
23
|
-
wu-local
|
24
|
-
wu-hadoop
|
25
|
-
wu-deploy
|
26
|
-
|
27
|
-
EOF
|
28
|
-
else
|
29
|
-
if ARGV.first =~ /^-/
|
30
|
-
$stderr.puts "ERROR ${now} [wu ] -- First argument must be the name of a wu tool to run, got <${1}>"
|
31
|
-
else
|
32
|
-
Kernel.exec "bundle exec wu-#{Shellwords.join(ARGV)}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|