wukong-deploy 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +1 -1
- data/lib/wukong-deploy.rb +8 -1
- data/lib/wukong-deploy/deploy_pack.rb +52 -11
- data/lib/wukong-deploy/dump_extensions.rb +6 -0
- data/lib/wukong-deploy/extensions/ftp_syncer.rb +67 -0
- data/lib/wukong-deploy/extensions/prepare_syncer.rb +105 -0
- data/lib/wukong-deploy/extensions/s3_syncer.rb +59 -0
- data/lib/wukong-deploy/extensions/storm_invocation_override.rb +13 -0
- data/lib/wukong-deploy/extensions/uses_file_state.rb +17 -0
- data/lib/wukong-deploy/extensions/uses_lockfile.rb +17 -0
- data/lib/wukong-deploy/{extensions.rb → hadoop_extensions.rb} +0 -0
- data/lib/wukong-deploy/repo.rb +2 -0
- data/lib/wukong-deploy/storm_extensions.rb +2 -0
- data/lib/wukong-deploy/sync_extensions.rb +27 -0
- data/lib/wukong-deploy/tasks.rb +6 -1
- data/lib/wukong-deploy/version.rb +1 -1
- data/templates/Gemfile.erb +42 -4
- data/templates/README.md.erb +261 -35
- data/templates/config/boot.rb.erb +0 -11
- data/templates/config/initializers/event_machine.rb.erb +5 -0
- data/templates/gitignore +5 -0
- data/wukong-deploy.gemspec +2 -2
- metadata +18 -10
- data/bin/wu +0 -35
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/lib/wukong-deploy.rb
CHANGED
@@ -26,8 +26,15 @@ module Wukong
|
|
26
26
|
settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
|
27
27
|
settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
|
28
28
|
when 'wu-hadoop'
|
29
|
-
require_relative('wukong-deploy/
|
29
|
+
require_relative('wukong-deploy/hadoop_extensions')
|
30
30
|
settings[:command_prefix] = 'bundle exec'
|
31
|
+
when 'wu-storm'
|
32
|
+
require_relative('wukong-deploy/storm_extensions')
|
33
|
+
settings[:command_prefix] = 'bundle exec'
|
34
|
+
when 'wu-dump'
|
35
|
+
require_relative('wukong-deploy/dump_extensions')
|
36
|
+
when 'wu-sync', 'wu-sync-all'
|
37
|
+
require_relative('wukong-deploy/sync_extensions')
|
31
38
|
end
|
32
39
|
end
|
33
40
|
|
@@ -1,5 +1,10 @@
|
|
1
1
|
module Wukong
|
2
2
|
module Deploy
|
3
|
+
|
4
|
+
# Return the name of this deploy pack.
|
5
|
+
def self.name
|
6
|
+
settings[:application]
|
7
|
+
end
|
3
8
|
|
4
9
|
# Return the root directory of this deploy pack.
|
5
10
|
#
|
@@ -58,11 +63,11 @@ module Wukong
|
|
58
63
|
@root
|
59
64
|
end
|
60
65
|
|
61
|
-
# The directory this deploy pack
|
66
|
+
# The directory this deploy pack puts all its application code in.
|
62
67
|
#
|
63
68
|
# @return [Pathname]
|
64
|
-
def self.
|
65
|
-
root.join('
|
69
|
+
def self.app_dir
|
70
|
+
root.join('app')
|
66
71
|
end
|
67
72
|
|
68
73
|
# The directory this deploy pack uses for configuration files.
|
@@ -71,12 +76,19 @@ module Wukong
|
|
71
76
|
def self.config_dir
|
72
77
|
root.join('config')
|
73
78
|
end
|
79
|
+
|
80
|
+
# The directory this deploy pack keeps local, sample data in.
|
81
|
+
#
|
82
|
+
# @return [Pathname]
|
83
|
+
def self.data_dir
|
84
|
+
root.join('data')
|
85
|
+
end
|
74
86
|
|
75
|
-
# The directory this deploy pack uses for
|
87
|
+
# The directory this deploy pack uses for lib files.
|
76
88
|
#
|
77
89
|
# @return [Pathname]
|
78
|
-
def self.
|
79
|
-
root.join('
|
90
|
+
def self.lib_dir
|
91
|
+
root.join('lib')
|
80
92
|
end
|
81
93
|
|
82
94
|
# The directory this deploy pack uses for logs.
|
@@ -86,11 +98,33 @@ module Wukong
|
|
86
98
|
root.join('log')
|
87
99
|
end
|
88
100
|
|
89
|
-
# The
|
101
|
+
# The directory this deploy pack puts all its scripts in.
|
90
102
|
#
|
91
103
|
# @return [Pathname]
|
92
|
-
def self.
|
93
|
-
root.join('
|
104
|
+
def self.script_dir
|
105
|
+
root.join('script')
|
106
|
+
end
|
107
|
+
|
108
|
+
# The directory this deploy pack puts all its Ruby unit tests.
|
109
|
+
#
|
110
|
+
# @return [Pathname]
|
111
|
+
def self.spec_dir
|
112
|
+
root.join('spec')
|
113
|
+
end
|
114
|
+
|
115
|
+
# The directory this deploy pack puts all its non-Ruby source
|
116
|
+
# code.
|
117
|
+
#
|
118
|
+
# @return [Pathname]
|
119
|
+
def self.src_dir
|
120
|
+
root.join('src')
|
121
|
+
end
|
122
|
+
|
123
|
+
# The directory this deploy pack uses for temporary files.
|
124
|
+
#
|
125
|
+
# @return [Pathname]
|
126
|
+
def self.tmp_dir
|
127
|
+
root.join('tmp')
|
94
128
|
end
|
95
129
|
|
96
130
|
private
|
@@ -148,9 +182,16 @@ module Wukong
|
|
148
182
|
end
|
149
183
|
|
150
184
|
# Read remote settings.
|
151
|
-
#
|
152
|
-
# FIXME -- not implemented yet.
|
153
185
|
def self.read_remote_settings
|
186
|
+
return unless settings[:vcd]
|
187
|
+
if name.nil? || name.empty?
|
188
|
+
Wukong::Log.warn("Define the :application settings to give this deploy pack a name and be be able to read remote settings.")
|
189
|
+
return
|
190
|
+
end
|
191
|
+
topic = "deploy_packs.#{name}"
|
192
|
+
remote_settings = vayacondios_client.get(topic)
|
193
|
+
settings.merge(remote_settings) if remote_settings.success?
|
154
194
|
end
|
195
|
+
|
155
196
|
end
|
156
197
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module FTPSyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'ftp', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: true,
|
20
|
+
step: 'ftp',
|
21
|
+
files: self.files,
|
22
|
+
}.tap { |e| e[:duration] = duration if duration })
|
23
|
+
now = Time.now.utc.to_i
|
24
|
+
last_ftp_sync_summary = { state: 1, time: now}
|
25
|
+
last_ftp_sync_summary[:data] = now unless self.files.empty?
|
26
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", last_ftp_sync_summary)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Announces an error during a sync and updates the last sync
|
30
|
+
# state and time.
|
31
|
+
def on_error error
|
32
|
+
super(error)
|
33
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
34
|
+
success: false,
|
35
|
+
step: 'ftp',
|
36
|
+
error: error.class,
|
37
|
+
message: error.message,
|
38
|
+
files: self.files,
|
39
|
+
})
|
40
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", { state: 0, time: Time.now.utc.to_i })
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns the Vayacondios topic for this FTP syncer.
|
44
|
+
#
|
45
|
+
# @return [String] the Vayacondios topic
|
46
|
+
def vayacondios_topic
|
47
|
+
"listeners.#{name}"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns a representation of this FTP syncer suitable for a
|
51
|
+
# Vayacondios stash.
|
52
|
+
#
|
53
|
+
# @return [Hash]
|
54
|
+
def to_vayacondios
|
55
|
+
{
|
56
|
+
protocol: settings[:protocol],
|
57
|
+
username: settings[:username],
|
58
|
+
password: (settings[:password] ? true : false),
|
59
|
+
host: settings[:host],
|
60
|
+
port: port,
|
61
|
+
path: settings[:path],
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module PrepareSyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'prepare', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: success?,
|
20
|
+
step: 'prepare',
|
21
|
+
counts: counts,
|
22
|
+
files: files,
|
23
|
+
}.tap { |e| e[:duration] = duration if duration })
|
24
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: (success? ? 1 : 0), time: Time.now.utc.to_i })
|
25
|
+
end
|
26
|
+
|
27
|
+
# Announces an error during a sync and updates the last sync
|
28
|
+
# state and time.
|
29
|
+
def on_error error
|
30
|
+
super(error)
|
31
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
32
|
+
success: false,
|
33
|
+
step: 'prepare',
|
34
|
+
error: error.class,
|
35
|
+
message: error.message,
|
36
|
+
counts: counts,
|
37
|
+
files: files,
|
38
|
+
})
|
39
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: 0, time: Time.now.utc.to_i })
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns the Vayacondios topic for this PrepareSyncer.
|
43
|
+
#
|
44
|
+
# @return [String] the Vayacondios topic
|
45
|
+
def vayacondios_topic
|
46
|
+
"listeners.#{name}"
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns a representation of this PrepareSyncer suitable for a
|
50
|
+
# Vayacondios stash.
|
51
|
+
#
|
52
|
+
# @return [Hash]
|
53
|
+
def to_vayacondios
|
54
|
+
{
|
55
|
+
split: settings[:split],
|
56
|
+
lines: settings[:lines],
|
57
|
+
bytes: settings[:bytes],
|
58
|
+
ordered: settings[:ordered],
|
59
|
+
metadata: settings[:metadata],
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
module HandlerOverride
|
64
|
+
|
65
|
+
# The topic for this Handler.
|
66
|
+
#
|
67
|
+
# Delegates to PrepareSyncer#vayacondios_topic.
|
68
|
+
#
|
69
|
+
# @return [String]
|
70
|
+
def vayacondios_topic
|
71
|
+
syncer.vayacondios_topic
|
72
|
+
end
|
73
|
+
|
74
|
+
# Announce the file was processed.
|
75
|
+
#
|
76
|
+
# @param [Pathname] original
|
77
|
+
def after_process original
|
78
|
+
super(original)
|
79
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
80
|
+
success: true,
|
81
|
+
step: 'prepare',
|
82
|
+
path: fragment_for(original),
|
83
|
+
size: File.size(original),
|
84
|
+
})
|
85
|
+
end
|
86
|
+
|
87
|
+
# Announce an error in processing a file.
|
88
|
+
#
|
89
|
+
# @param [Pathname] original
|
90
|
+
# @param [Error] error
|
91
|
+
def on_error original, error
|
92
|
+
super(original, error)
|
93
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
94
|
+
success: false,
|
95
|
+
step: 'prepare',
|
96
|
+
path: fragment_for(original),
|
97
|
+
error: error.class,
|
98
|
+
message: error.message
|
99
|
+
})
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Attaches to the hooks provided by the Wukong::Load::Syncer class
|
5
|
+
# to write data to Vayacondios.
|
6
|
+
module S3SyncerOverride
|
7
|
+
|
8
|
+
# Saves the syncer as a stash in Vayacondios.
|
9
|
+
def before_sync
|
10
|
+
super()
|
11
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 's3', self)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Announces a successful sync and updates the last sync state
|
15
|
+
# and time.
|
16
|
+
def after_sync
|
17
|
+
super()
|
18
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
19
|
+
success: true,
|
20
|
+
step: 's3',
|
21
|
+
bytes: (bytes || 0),
|
22
|
+
}.tap { |e| e[:duration] = duration if duration })
|
23
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 1, time: Time.now.utc.to_i })
|
24
|
+
end
|
25
|
+
|
26
|
+
# Announces an error during a sync and updates the last sync
|
27
|
+
# state and time.
|
28
|
+
def on_error error
|
29
|
+
super(error)
|
30
|
+
Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
|
31
|
+
success: false,
|
32
|
+
step: 's3',
|
33
|
+
error: error.class,
|
34
|
+
message: error.message,
|
35
|
+
bytes: bytes,
|
36
|
+
})
|
37
|
+
Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 0, time: Time.now.utc.to_i })
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the Vayacondios topic for this S3Syncer.
|
41
|
+
#
|
42
|
+
# @return [String] the Vayacondios topic
|
43
|
+
def vayacondios_topic
|
44
|
+
"listeners.#{name}"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns a representation of this S3Syncer suitable for a
|
48
|
+
# Vayacondios stash.
|
49
|
+
#
|
50
|
+
# @return [Hash]
|
51
|
+
def to_vayacondios
|
52
|
+
{
|
53
|
+
bucket: s3_uri,
|
54
|
+
region: settings[:region],
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Makes `UsesFileState#file_state_dir` default to the
|
5
|
+
# `Wukong::Deploy.tmp_dir`.
|
6
|
+
module UsesFileStateOverride
|
7
|
+
|
8
|
+
# Overrides the default value of #file_state_dir with
|
9
|
+
# Wukong::Deploy.tmp_dir.
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
def file_state_dir
|
13
|
+
Wukong::Deploy.tmp_dir.to_s
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Makes `UsesLockfile#lockfile_dir` default to the
|
5
|
+
# `Wukong::Deploy.tmp_dir`.
|
6
|
+
module UsesLockfileOverride
|
7
|
+
|
8
|
+
# Overrides the default value of #lockfile_dir with
|
9
|
+
# Wukong::Deploy.tmp_dir.
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
def lockfile_dir
|
13
|
+
Wukong::Deploy.tmp_dir.to_s
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
File without changes
|
data/lib/wukong-deploy/repo.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative('extensions/uses_lockfile')
|
2
|
+
require_relative('extensions/uses_file_state')
|
3
|
+
require_relative('extensions/ftp_syncer')
|
4
|
+
require_relative('extensions/prepare_syncer')
|
5
|
+
require_relative('extensions/s3_syncer')
|
6
|
+
|
7
|
+
Wukong::Load::SyncRunner.class_eval do
|
8
|
+
include Wukong::Deploy::UsesLockfileOverride
|
9
|
+
end
|
10
|
+
if defined?(Wukong::Load::SyncAllRunner)
|
11
|
+
Wukong::Load::SyncAllRunner.class_eval do
|
12
|
+
include Wukong::Deploy::UsesLockfileOverride
|
13
|
+
end
|
14
|
+
end
|
15
|
+
Wukong::Load::FTPSyncer.class_eval do
|
16
|
+
include Wukong::Deploy::FTPSyncerOverride
|
17
|
+
end
|
18
|
+
Wukong::Load::PrepareSyncer.class_eval do
|
19
|
+
include Wukong::Deploy::UsesFileStateOverride
|
20
|
+
include Wukong::Deploy::PrepareSyncerOverride
|
21
|
+
end
|
22
|
+
Wukong::Load::PrepareSyncer::Handler.class_eval do
|
23
|
+
include Wukong::Deploy::PrepareSyncerOverride::HandlerOverride
|
24
|
+
end
|
25
|
+
Wukong::Load::S3Syncer.class_eval do
|
26
|
+
include Wukong::Deploy::S3SyncerOverride
|
27
|
+
end
|
data/lib/wukong-deploy/tasks.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
require 'rake'
|
2
2
|
|
3
|
+
require 'wukong/rake_helper'
|
4
|
+
|
3
5
|
require 'yard'
|
4
|
-
YARD::Rake::YardocTask.new(:doc)
|
6
|
+
YARD::Rake::YardocTask.new(:doc) do |t|
|
7
|
+
t.files = %w[app/**/*.rb Rakefile Gemfile]
|
8
|
+
t.options = ['--load', Wukong.doc_helpers_path]
|
9
|
+
end
|
5
10
|
|
6
11
|
require 'rspec/core/rake_task'
|
7
12
|
RSpec::Core::RakeTask.new(:spec)
|
data/templates/Gemfile.erb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
# Use the public [RubyGems server](
|
2
|
-
source
|
1
|
+
# Use the public [RubyGems server](https://rubygems.org/)
|
2
|
+
source 'https://rubygems.org'
|
3
3
|
|
4
4
|
# = Application
|
5
5
|
#
|
@@ -12,7 +12,7 @@ source :rubygems
|
|
12
12
|
#
|
13
13
|
# here and then remember to run
|
14
14
|
#
|
15
|
-
# $ bundle install
|
15
|
+
# $ bundle install
|
16
16
|
#
|
17
17
|
# Here's a link to the [Bundler documentation](http://gembundler.com/)
|
18
18
|
# for your convenience.
|
@@ -46,6 +46,7 @@ gem 'wonderdog', '0.1.0'
|
|
46
46
|
# Provides
|
47
47
|
#
|
48
48
|
# - wu-storm
|
49
|
+
# - wu-bolt
|
49
50
|
gem 'wukong-storm', '0.1.0'
|
50
51
|
|
51
52
|
# Lets you load JSON blobs into data stores.
|
@@ -56,13 +57,50 @@ gem 'wukong-storm', '0.1.0'
|
|
56
57
|
# - wu-source
|
57
58
|
gem 'wukong-load', '0.1.0'
|
58
59
|
|
59
|
-
#
|
60
|
+
# If you want to use wu-load with certain databases you may have to
|
61
|
+
# enable the various gems required for the database(s) you're
|
62
|
+
# interested in here.
|
63
|
+
group :load do
|
64
|
+
|
65
|
+
#
|
66
|
+
# Already required by one or more Wukong plugins:
|
67
|
+
#
|
68
|
+
# - Kafka
|
69
|
+
#
|
70
|
+
# Not required because uses Ruby standard library to connect:
|
71
|
+
#
|
72
|
+
# - ElasticSearch
|
73
|
+
#
|
74
|
+
|
75
|
+
# Enable MongoDB
|
76
|
+
#
|
77
|
+
# gem 'mongo'
|
78
|
+
# gem 'bson_ext'
|
79
|
+
|
80
|
+
# Enable MySQL
|
81
|
+
#
|
82
|
+
# gem 'mysql2'
|
83
|
+
end
|
84
|
+
|
85
|
+
# Framework for gluing Wukong plugins together into a deploy pack.
|
60
86
|
#
|
61
87
|
# Provides
|
62
88
|
#
|
63
89
|
# - wu-deploy
|
64
90
|
gem 'wukong-deploy', '0.1.0'
|
65
91
|
|
92
|
+
# Provides metadata about a deploy pack.
|
93
|
+
#
|
94
|
+
# Provides
|
95
|
+
#
|
96
|
+
# - wu-meta
|
97
|
+
#
|
98
|
+
gem 'wukong-meta', '0.1.0'
|
99
|
+
|
100
|
+
# Integrates Wukong processors with Vayacondios.
|
101
|
+
#
|
102
|
+
gem 'wukong-vayacondios', '0.1.0'
|
103
|
+
|
66
104
|
group :development do
|
67
105
|
gem 'yard'
|
68
106
|
gem 'redcarpet'
|
data/templates/README.md.erb
CHANGED
@@ -94,55 +94,84 @@ want to run `rbenv exec bundle install`.
|
|
94
94
|
|
95
95
|
### Configuration
|
96
96
|
|
97
|
-
|
98
|
-
you begin to extend it you may add functionality which benefits from
|
99
|
-
the ability to be configured.
|
97
|
+
#### Configuring the Environment
|
100
98
|
|
101
|
-
|
102
|
-
|
103
|
-
|
99
|
+
Before any of the `wu` programs can run, the Ruby process must first
|
100
|
+
boot up, require Wukong and all necessary dependencies (such as
|
101
|
+
'event-machine') and plugins (such as the deploy pack plugin
|
102
|
+
`wukong-deploy`), and then hand over control to the `wu` program.
|
104
103
|
|
105
|
-
|
104
|
+
The following Ruby files are loaded in order. Each file is
|
105
|
+
responsible for configuring some part of this runtime environment:
|
106
|
+
|
107
|
+
1. `config/environment` -- requires the rest of the files and adds any additional environmental code
|
108
|
+
2. `config/application` -- defines the load order of external libraries, Wukong plugins, and application code
|
109
|
+
3. `config/boot` -- defines how and where the Ruby process will look for code dependencies (through Bundler)
|
110
|
+
4. `config/initializers/*.rb` -- non-Wukong configuration for external libraries or application code can live here
|
111
|
+
|
112
|
+
#### Configuring the Application
|
113
|
+
|
114
|
+
The application a given deploy pack is running can be configured at
|
115
|
+
several different layers.
|
116
|
+
|
117
|
+
The simplest layer is settings passed to `wu` programs on the
|
118
|
+
command-line. These settings have the highest precedence and will
|
119
|
+
always be read.
|
120
|
+
|
121
|
+
When booting any of the `wu` tools the deploy pack will also read and
|
122
|
+
merge settings from the following configuration files, in order of
|
123
|
+
**increasing** precedence:
|
124
|
+
|
125
|
+
1. `config/settings.yml`
|
126
|
+
2. `config/settings/*.yml` if present, without any guarantee as to order
|
127
|
+
3. `config/environments/[environment].yml`
|
128
|
+
4. `config/environments/[environment]/*.yml` if present, without any guarantee as to order
|
129
|
+
5. `config/deploy.yml` if present (this file should be ignored by version control)
|
130
|
+
6. `config/environments/deploy-[environment].yml` if present (this file should be ignored by version control)
|
131
|
+
|
132
|
+
Finally, if interaction with Vayacondios is turned on, settings will
|
133
|
+
also be read from a Vayacondios stash (see the <a
|
134
|
+
href="#vayacondios>Vayacondios section</a> below).
|
135
|
+
|
136
|
+
Completely merged and resolved configuration settings are accessible
|
137
|
+
globally (once the Wukong framework has been booted) via the
|
138
|
+
`Wukong::Deploy.settings` object. Any piece of code in a model,
|
139
|
+
processor, dataflow, or elsewhere can read and write to this object.
|
140
|
+
|
141
|
+
Processors will *automatically* read settings for their fields from a
|
142
|
+
subhash within this global settings object. Given a processor like
|
106
143
|
|
107
144
|
```ruby
|
108
|
-
Wukong.
|
109
|
-
field :
|
110
|
-
def process
|
111
|
-
|
145
|
+
Wukong.processor(:tokenizer) do
|
146
|
+
field :min_length, Integer, default: 2
|
147
|
+
def process line
|
148
|
+
...
|
112
149
|
end
|
113
150
|
end
|
114
151
|
```
|
115
152
|
|
116
|
-
|
117
|
-
|
118
|
-
```
|
119
|
-
$ cat input
|
120
|
-
1
|
121
|
-
2
|
122
|
-
3
|
123
|
-
$ cat input | wu-local configurable_decorator
|
124
|
-
1.
|
125
|
-
2.
|
126
|
-
3.
|
127
|
-
$ cat input | wu-local configurable_decorator --suffix=','
|
128
|
-
1,
|
129
|
-
2,
|
130
|
-
3,
|
131
|
-
|
132
|
-
You can also set the same property in a configuration file, scoped by
|
133
|
-
the name of the processor:
|
153
|
+
you can set override the value of its `min_length` field by putting
|
154
|
+
the following section into any one of the configuration files above:
|
134
155
|
|
135
156
|
```yaml
|
136
|
-
# in config/settings.yml
|
137
157
|
---
|
158
|
+
# in config/settings.yml, for example
|
138
159
|
|
139
|
-
|
140
|
-
|
160
|
+
tokenizer:
|
161
|
+
min_length: 5
|
141
162
|
```
|
142
163
|
|
143
|
-
which
|
144
|
-
|
145
|
-
|
164
|
+
which would now make the command
|
165
|
+
|
166
|
+
```
|
167
|
+
$ cat corpus.txt | wu local tokenizer
|
168
|
+
```
|
169
|
+
|
170
|
+
have the same effect as
|
171
|
+
|
172
|
+
```
|
173
|
+
$ cat corpus.txt | wu local tokenizer --min_length=5
|
174
|
+
```
|
146
175
|
|
147
176
|
## File Structure
|
148
177
|
|
@@ -206,3 +235,200 @@ the underlying documentation for Wukong and its plugins, specifically:
|
|
206
235
|
|
207
236
|
* on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
|
208
237
|
* on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
|
238
|
+
|
239
|
+
|
240
|
+
<a target="#vayacondios">
|
241
|
+
## Interacting with Vayacondios
|
242
|
+
|
243
|
+
[Vayacondios](http://github.com/infochimps-labs/vayacondios) is a
|
244
|
+
program which makes it easy to for clients to announce events or read
|
245
|
+
and write settings to and from a central server.
|
246
|
+
|
247
|
+
The basic objects of Vayacondios are **stash** and the **event**:
|
248
|
+
|
249
|
+
* a **stash** is an "object", a "configuration", or "setting" designed to be shared among many services
|
250
|
+
* an **event** is a "fact", "measurement", or "metric" announced by an arbitrary service, possibly related to some stash
|
251
|
+
|
252
|
+
Stashes and events are organized in two levels.
|
253
|
+
|
254
|
+
The top-level is the **organization**. Data from multiple
|
255
|
+
organizations is stored together but accessed separately by a running
|
256
|
+
Vayacondios server. An organization could be the name of a user,
|
257
|
+
workgroup, application, or service using Vayacondios.
|
258
|
+
|
259
|
+
The next level is the **topic**. Each topic within Vayacondios has a
|
260
|
+
single stash and can have multiple events. An "object" like a server,
|
261
|
+
a database, an application, a service, or a user maps to the concept
|
262
|
+
of "topic".
|
263
|
+
|
264
|
+
Every `wu` tool running within a deploy pack takes an additional
|
265
|
+
option `--vcd` which turns on or off interactions with Vayacondios.
|
266
|
+
This option can be specified at runtime on the command-line as well as
|
267
|
+
via a configuration file. When not running "in Vayacondios mode"
|
268
|
+
(with `--vcd` was not passed), interactions with Vayacondios will be
|
269
|
+
logged instead of transmitted and received.
|
270
|
+
|
271
|
+
### Configuring Vayacondios access
|
272
|
+
|
273
|
+
If you don't intend to interact with a Vayacondios server, you can
|
274
|
+
just set `vcd` to `false` for your whole environment and skip this
|
275
|
+
section (as is done, for example, in the `test` environment by
|
276
|
+
default).
|
277
|
+
|
278
|
+
If you intend to interact with Vayacondios then you need to also
|
279
|
+
specify the `vcd_host` and `vcd_port` options which otherwise default
|
280
|
+
to the usual Vayacondios server port running on localhost.
|
281
|
+
|
282
|
+
```yaml
|
283
|
+
---
|
284
|
+
# in config/environments/production.yml
|
285
|
+
vcd_host: 10.123.123.123
|
286
|
+
vcd_port: 9000
|
287
|
+
```
|
288
|
+
|
289
|
+
Vayacondios also requires that all events and stashes are stored under
|
290
|
+
a given organization name. The Vayacondios organization, which will
|
291
|
+
likely be shared across all environments of your application, is
|
292
|
+
usually set at the top-level:
|
293
|
+
|
294
|
+
```yaml
|
295
|
+
---
|
296
|
+
# in config/settings.yml
|
297
|
+
organization: my_company
|
298
|
+
```
|
299
|
+
|
300
|
+
### Handle out of band event data with Events
|
301
|
+
|
302
|
+
Despite being designed to be powerful and scalable, Vayacondios is not
|
303
|
+
the appropriate store for high-volume, high-throughput,
|
304
|
+
mission-critical data which must be persisited over the long-term.
|
305
|
+
Instead it should be used for "out of band" data, which is typically
|
306
|
+
much smaller in volume and throughput than the main body of a
|
307
|
+
dataflow. Examples of such out of band events include:
|
308
|
+
|
309
|
+
* signalling some intermittend or runtime error
|
310
|
+
* warning that some event was bad or suspicious
|
311
|
+
* logging an error
|
312
|
+
* registering some periodic metric
|
313
|
+
* signaling a change in state
|
314
|
+
|
315
|
+
Announcements can be made from anywhere within the Wukong framework by
|
316
|
+
accessing the `Wukong::Deploy.vayacondios_client` object but the most
|
317
|
+
common approach is to announce events within a processor or within a
|
318
|
+
dataflow.
|
319
|
+
|
320
|
+
#### Announcing from a processor
|
321
|
+
|
322
|
+
The `Wukong::Processor#announce` method can be used to directly send
|
323
|
+
an event to Vayacondios on a given topic.
|
324
|
+
|
325
|
+
```ruby
|
326
|
+
Wukong.processor(:parser) do
|
327
|
+
def process line
|
328
|
+
yield parse!(line)
|
329
|
+
rescue ParseError => e
|
330
|
+
announce "parser.errors", line: line
|
331
|
+
end
|
332
|
+
end
|
333
|
+
```
|
334
|
+
|
335
|
+
It's important when setting up an announcement like this that you
|
336
|
+
consider how often this piece of code will actually send events to
|
337
|
+
Vayacondios. If a `ParseError` is triggered once in every 10,000
|
338
|
+
lines, this may be perfectly fine to be running in production. If 1
|
339
|
+
in 10 lines causes a similar error, this may not be the right
|
340
|
+
approach.
|
341
|
+
|
342
|
+
#### Announcing from a dataflow
|
343
|
+
|
344
|
+
The `announce` processor can be used to send all announce all incoming
|
345
|
+
events to Vayacondios. Here's an example flow which makes use of it:
|
346
|
+
|
347
|
+
```ruby
|
348
|
+
Wukong.dataflow(:parse_source) do
|
349
|
+
parser |
|
350
|
+
[
|
351
|
+
select(&:valid?) | ... | to_json,
|
352
|
+
select(&:invalid?) | announce(topic: "invalid_records")
|
353
|
+
]
|
354
|
+
end
|
355
|
+
```
|
356
|
+
|
357
|
+
Just as in the above example with a processor, it's important that the
|
358
|
+
flow through the announce processor is not incredibly high-volume.
|
359
|
+
|
360
|
+
The `announce` processor is terminal; it yields no output records.
|
361
|
+
|
362
|
+
### Allow dynamic configuration with Stashes
|
363
|
+
|
364
|
+
The deploy pack inside a backend system like Hadoop or Storm can fetch
|
365
|
+
stashes from Vayacondios during runtime. Other systems external to
|
366
|
+
the deploy pack can simultaneously be writing data into these same
|
367
|
+
stashes in Vayacondios, allowing for a lightweight, two-way
|
368
|
+
communication stream between the deploy pack and arbitrary external
|
369
|
+
resources, mediated by a key-value store (the Vayacondios stash).
|
370
|
+
|
371
|
+
Stashes can be read and written from anywhere within the Wukong
|
372
|
+
framework by accessing the `Wukong::Deploy.vayacondios_client` object
|
373
|
+
but there are two special places where encapsulated, remote settings
|
374
|
+
are very useful.
|
375
|
+
|
376
|
+
#### Dynamic settings for the deploy pack itself
|
377
|
+
|
378
|
+
Each deploy pack, as an application, can fetch a stash of settings
|
379
|
+
from Vayacondios and use this as bootup time in the same way it uses a
|
380
|
+
configuration file ond disk. All that is required is a Vayacondios
|
381
|
+
stash topic name. This is furnished by providing to the deploy pack
|
382
|
+
an `application` name in a configuration file, usually the top-level
|
383
|
+
one:
|
384
|
+
|
385
|
+
```yaml
|
386
|
+
---
|
387
|
+
# in config/settings.yml, for example
|
388
|
+
|
389
|
+
application: my_app
|
390
|
+
```
|
391
|
+
|
392
|
+
When any `wu` tool is launched within the deploy pack with the `--vcd`
|
393
|
+
option (possibly set an an environment-wide level via a configuration
|
394
|
+
file) then remote settings from Vayacondios for the `application` will
|
395
|
+
be pulled at boot-time and merged into the local settings from
|
396
|
+
configuration files and the command-line.
|
397
|
+
|
398
|
+
#### Dynamic settings for processors
|
399
|
+
|
400
|
+
The processor `tokenizer` in the deploy pack with application name
|
401
|
+
`my_app` defaults to using the stash with topic
|
402
|
+
`processors.my_app-tokenizer` in Vayacondios to store its settings
|
403
|
+
(this can be changed by overriding the `Wukong::Processor#vcd_topic`
|
404
|
+
method).
|
405
|
+
|
406
|
+
These settings, if they exist, can be retrieved and merged into the
|
407
|
+
processor's current fields at anytime using the
|
408
|
+
`Wukong::Processor#update_settings`. A common use case is to want to
|
409
|
+
update a processor's fields every 30 seconds, or similar. This is
|
410
|
+
most easily accomplished via the
|
411
|
+
`Wukong::Processor#update_settings_every` method. Here's an example
|
412
|
+
|
413
|
+
```ruby
|
414
|
+
Wukong.processor(:tagger) do
|
415
|
+
field :tags, Array, doc: "List of tags to check", default: []
|
416
|
+
|
417
|
+
def setup
|
418
|
+
update_settings_every(30)
|
419
|
+
end
|
420
|
+
|
421
|
+
def process record
|
422
|
+
tags.each do |tag|
|
423
|
+
...
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
```
|
428
|
+
|
429
|
+
The `tags` field of this processor will be updated every 30 seconds
|
430
|
+
with the latest values from Vayacondios.
|
431
|
+
|
432
|
+
The `Wukong::Processor#save_settings` and
|
433
|
+
`Wukong::Processor#save_settings_every` and methods can be used to
|
434
|
+
save settings from a processor **to** Vayacondios.
|
@@ -1,17 +1,6 @@
|
|
1
1
|
# = config/boot.rb
|
2
2
|
#
|
3
3
|
# Loads the Ruby language framework and activates Bundler.
|
4
|
-
#
|
5
|
-
# We can use a system-wide bundle installation _or_ a standalone
|
6
|
-
# bundle installation. If a standalone installation exists (at
|
7
|
-
# /bundle/bundler/setup.rb), we will detect and use it. Otherwise, we
|
8
|
-
# fall back on the system-wide bundle installation.
|
9
|
-
#
|
10
|
-
# Make sure you install the bundle locally with the following command:
|
11
|
-
#
|
12
|
-
# @example Install a standalone bundle of gems
|
13
|
-
#
|
14
|
-
# $ bundle install --standalone
|
15
4
|
|
16
5
|
local_setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
|
17
6
|
if File.exist?(local_setup_path)
|
@@ -0,0 +1,5 @@
|
|
1
|
+
# EventMachine hard-codes a very inconvenient maximum length for a
|
2
|
+
# line -- 16K. This is too small for many use cases and is enlarged
|
3
|
+
# here to 16M.
|
4
|
+
EventMachine::Protocols::LineAndTextProtocol.send(:remove_const, :MaxLineLength)
|
5
|
+
EventMachine::Protocols::LineAndTextProtocol.send(:const_set, :MaxLineLength, 16*1024*1024)
|
data/templates/gitignore
CHANGED
data/wukong-deploy.gemspec
CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |gem|
|
|
31
31
|
EOF
|
32
32
|
|
33
33
|
gem.files = `git ls-files`.split("\n")
|
34
|
-
gem.executables = ['wu-deploy'
|
34
|
+
gem.executables = ['wu-deploy']
|
35
35
|
gem.test_files = gem.files.grep(/^spec/)
|
36
36
|
gem.require_paths = ['lib']
|
37
37
|
|
38
|
-
gem.add_dependency('wukong', '
|
38
|
+
gem.add_dependency('wukong', '4.0.0')
|
39
39
|
gem.add_dependency('rake', '~> 0.9')
|
40
40
|
gem.add_dependency('erubis')
|
41
41
|
gem.add_dependency('diffy')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wukong-deploy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date:
|
14
|
+
date: 2014-03-19 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: wukong
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - '='
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 4.0.0
|
24
24
|
type: :runtime
|
25
25
|
prerelease: false
|
26
26
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
requirements:
|
29
29
|
- - '='
|
30
30
|
- !ruby/object:Gem::Version
|
31
|
-
version:
|
31
|
+
version: 4.0.0
|
32
32
|
- !ruby/object:Gem::Dependency
|
33
33
|
name: rake
|
34
34
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,7 +91,6 @@ description: ! " The Infochimps Platform is an end-to-end, managed solution for
|
|
91
91
|
email: coders@infochimps.com
|
92
92
|
executables:
|
93
93
|
- wu-deploy
|
94
|
-
- wu
|
95
94
|
extensions: []
|
96
95
|
extra_rdoc_files: []
|
97
96
|
files:
|
@@ -102,17 +101,25 @@ files:
|
|
102
101
|
- LICENSE.md
|
103
102
|
- README.md
|
104
103
|
- Rakefile
|
105
|
-
- bin/wu
|
106
104
|
- bin/wu-deploy
|
107
105
|
- examples/.gitkeep
|
108
106
|
- lib/wukong-deploy.rb
|
109
107
|
- lib/wukong-deploy/console.rb
|
110
108
|
- lib/wukong-deploy/deploy_pack.rb
|
111
109
|
- lib/wukong-deploy/deploy_runner.rb
|
112
|
-
- lib/wukong-deploy/
|
110
|
+
- lib/wukong-deploy/dump_extensions.rb
|
111
|
+
- lib/wukong-deploy/extensions/ftp_syncer.rb
|
113
112
|
- lib/wukong-deploy/extensions/hadoop_invocation_override.rb
|
113
|
+
- lib/wukong-deploy/extensions/prepare_syncer.rb
|
114
|
+
- lib/wukong-deploy/extensions/s3_syncer.rb
|
115
|
+
- lib/wukong-deploy/extensions/storm_invocation_override.rb
|
116
|
+
- lib/wukong-deploy/extensions/uses_file_state.rb
|
117
|
+
- lib/wukong-deploy/extensions/uses_lockfile.rb
|
118
|
+
- lib/wukong-deploy/hadoop_extensions.rb
|
114
119
|
- lib/wukong-deploy/rake_runner.rb
|
115
120
|
- lib/wukong-deploy/repo.rb
|
121
|
+
- lib/wukong-deploy/storm_extensions.rb
|
122
|
+
- lib/wukong-deploy/sync_extensions.rb
|
116
123
|
- lib/wukong-deploy/tasks.rb
|
117
124
|
- lib/wukong-deploy/templater.rb
|
118
125
|
- lib/wukong-deploy/templater/conflict_resolution.rb
|
@@ -131,6 +138,7 @@ files:
|
|
131
138
|
- templates/config/environments/development.yml.erb
|
132
139
|
- templates/config/environments/production.yml.erb
|
133
140
|
- templates/config/environments/test.yml.erb
|
141
|
+
- templates/config/initializers/event_machine.rb.erb
|
134
142
|
- templates/config/settings.yml.erb
|
135
143
|
- templates/gitignore
|
136
144
|
- templates/spec/spec_helper.rb.erb
|
@@ -150,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
150
158
|
version: '0'
|
151
159
|
segments:
|
152
160
|
- 0
|
153
|
-
hash:
|
161
|
+
hash: -3128770076158943322
|
154
162
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
155
163
|
none: false
|
156
164
|
requirements:
|
@@ -159,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
159
167
|
version: '0'
|
160
168
|
segments:
|
161
169
|
- 0
|
162
|
-
hash:
|
170
|
+
hash: -3128770076158943322
|
163
171
|
requirements: []
|
164
172
|
rubyforge_project:
|
165
|
-
rubygems_version: 1.8.
|
173
|
+
rubygems_version: 1.8.23
|
166
174
|
signing_key:
|
167
175
|
specification_version: 3
|
168
176
|
summary: Defines the deploy pack framework used by the Infochimps Platform
|
data/bin/wu
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
require 'shellwords'
|
3
|
-
now=Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
4
|
-
if ARGV.empty?
|
5
|
-
$stderr.puts "ERROR #{now} [wu ] -- Must provide a Wukong command to run. Try the --help option."
|
6
|
-
else
|
7
|
-
if ARGV.size == 1 && ARGV.first == '--help'
|
8
|
-
$stderr.puts <<EOF
|
9
|
-
usage: wu COMMAND [OPTIONS] [ARG] ...
|
10
|
-
|
11
|
-
wu is a wrapper for easy use of Wukong's command-line tools. It takes
|
12
|
-
your arguments, constructs the name of the proper wu-tool to call, and
|
13
|
-
prepends a call to bundle exec.
|
14
|
-
|
15
|
-
$ wu local ...
|
16
|
-
|
17
|
-
is equivalent to
|
18
|
-
|
19
|
-
$ bundle exec wu-local ...
|
20
|
-
|
21
|
-
You can run any of the wu-tools like
|
22
|
-
|
23
|
-
wu-local
|
24
|
-
wu-hadoop
|
25
|
-
wu-deploy
|
26
|
-
|
27
|
-
EOF
|
28
|
-
else
|
29
|
-
if ARGV.first =~ /^-/
|
30
|
-
$stderr.puts "ERROR ${now} [wu ] -- First argument must be the name of a wu tool to run, got <${1}>"
|
31
|
-
else
|
32
|
-
Kernel.exec "bundle exec wu-#{Shellwords.join(ARGV)}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|