wukong-deploy 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -57,3 +57,4 @@ away
57
57
  .rbx
58
58
  Gemfile.lock
59
59
  Backup*of*.numbers
60
+ *.gem
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
@@ -26,8 +26,15 @@ module Wukong
26
26
  settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
27
27
  settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
28
28
  when 'wu-hadoop'
29
- require_relative('wukong-deploy/extensions')
29
+ require_relative('wukong-deploy/hadoop_extensions')
30
30
  settings[:command_prefix] = 'bundle exec'
31
+ when 'wu-storm'
32
+ require_relative('wukong-deploy/storm_extensions')
33
+ settings[:command_prefix] = 'bundle exec'
34
+ when 'wu-dump'
35
+ require_relative('wukong-deploy/dump_extensions')
36
+ when 'wu-sync', 'wu-sync-all'
37
+ require_relative('wukong-deploy/sync_extensions')
31
38
  end
32
39
  end
33
40
 
@@ -1,5 +1,10 @@
1
1
  module Wukong
2
2
  module Deploy
3
+
4
+ # Return the name of this deploy pack.
5
+ def self.name
6
+ settings[:application]
7
+ end
3
8
 
4
9
  # Return the root directory of this deploy pack.
5
10
  #
@@ -58,11 +63,11 @@ module Wukong
58
63
  @root
59
64
  end
60
65
 
61
- # The directory this deploy pack keeps local, sample data in.
66
+ # The directory this deploy pack puts all its application code in.
62
67
  #
63
68
  # @return [Pathname]
64
- def self.data_dir
65
- root.join('data')
69
+ def self.app_dir
70
+ root.join('app')
66
71
  end
67
72
 
68
73
  # The directory this deploy pack uses for configuration files.
@@ -71,12 +76,19 @@ module Wukong
71
76
  def self.config_dir
72
77
  root.join('config')
73
78
  end
79
+
80
+ # The directory this deploy pack keeps local, sample data in.
81
+ #
82
+ # @return [Pathname]
83
+ def self.data_dir
84
+ root.join('data')
85
+ end
74
86
 
75
- # The directory this deploy pack uses for temporary files.
87
+ # The directory this deploy pack uses for lib files.
76
88
  #
77
89
  # @return [Pathname]
78
- def self.tmp_dir
79
- root.join('tmp')
90
+ def self.lib_dir
91
+ root.join('lib')
80
92
  end
81
93
 
82
94
  # The directory this deploy pack uses for logs.
@@ -86,11 +98,33 @@ module Wukong
86
98
  root.join('log')
87
99
  end
88
100
 
89
- # The diretory this deploy pack puts all its application code in.
101
+ # The directory this deploy pack puts all its scripts in.
90
102
  #
91
103
  # @return [Pathname]
92
- def self.app_dir
93
- root.join('app')
104
+ def self.script_dir
105
+ root.join('script')
106
+ end
107
+
108
+ # The directory this deploy pack puts all its Ruby unit tests.
109
+ #
110
+ # @return [Pathname]
111
+ def self.spec_dir
112
+ root.join('spec')
113
+ end
114
+
115
+ # The directory this deploy pack puts all its non-Ruby source
116
+ # code.
117
+ #
118
+ # @return [Pathname]
119
+ def self.src_dir
120
+ root.join('src')
121
+ end
122
+
123
+ # The directory this deploy pack uses for temporary files.
124
+ #
125
+ # @return [Pathname]
126
+ def self.tmp_dir
127
+ root.join('tmp')
94
128
  end
95
129
 
96
130
  private
@@ -148,9 +182,16 @@ module Wukong
148
182
  end
149
183
 
150
184
  # Read remote settings.
151
- #
152
- # FIXME -- not implemented yet.
153
185
  def self.read_remote_settings
186
+ return unless settings[:vcd]
187
+ if name.nil? || name.empty?
188
+ Wukong::Log.warn("Define the :application settings to give this deploy pack a name and be be able to read remote settings.")
189
+ return
190
+ end
191
+ topic = "deploy_packs.#{name}"
192
+ remote_settings = vayacondios_client.get(topic)
193
+ settings.merge(remote_settings) if remote_settings.success?
154
194
  end
195
+
155
196
  end
156
197
  end
@@ -0,0 +1,6 @@
1
+ require_relative('extensions/uses_lockfile')
2
+ require_relative('extensions/uses_file_state')
3
+ Wukong::Load::DirectoryDumper.class_eval do
4
+ include Wukong::Deploy::UsesLockfileOverride
5
+ include Wukong::Deploy::UsesFileStateOverride
6
+ end
@@ -0,0 +1,67 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module FTPSyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'ftp', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: true,
20
+ step: 'ftp',
21
+ files: self.files,
22
+ }.tap { |e| e[:duration] = duration if duration })
23
+ now = Time.now.utc.to_i
24
+ last_ftp_sync_summary = { state: 1, time: now}
25
+ last_ftp_sync_summary[:data] = now unless self.files.empty?
26
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", last_ftp_sync_summary)
27
+ end
28
+
29
+ # Announces an error during a sync and updates the last sync
30
+ # state and time.
31
+ def on_error error
32
+ super(error)
33
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
34
+ success: false,
35
+ step: 'ftp',
36
+ error: error.class,
37
+ message: error.message,
38
+ files: self.files,
39
+ })
40
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", { state: 0, time: Time.now.utc.to_i })
41
+ end
42
+
43
+ # Returns the Vayacondios topic for this FTP syncer.
44
+ #
45
+ # @return [String] the Vayacondios topic
46
+ def vayacondios_topic
47
+ "listeners.#{name}"
48
+ end
49
+
50
+ # Returns a representation of this FTP syncer suitable for a
51
+ # Vayacondios stash.
52
+ #
53
+ # @return [Hash]
54
+ def to_vayacondios
55
+ {
56
+ protocol: settings[:protocol],
57
+ username: settings[:username],
58
+ password: (settings[:password] ? true : false),
59
+ host: settings[:host],
60
+ port: port,
61
+ path: settings[:path],
62
+ }
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,105 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module PrepareSyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'prepare', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: success?,
20
+ step: 'prepare',
21
+ counts: counts,
22
+ files: files,
23
+ }.tap { |e| e[:duration] = duration if duration })
24
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: (success? ? 1 : 0), time: Time.now.utc.to_i })
25
+ end
26
+
27
+ # Announces an error during a sync and updates the last sync
28
+ # state and time.
29
+ def on_error error
30
+ super(error)
31
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
32
+ success: false,
33
+ step: 'prepare',
34
+ error: error.class,
35
+ message: error.message,
36
+ counts: counts,
37
+ files: files,
38
+ })
39
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: 0, time: Time.now.utc.to_i })
40
+ end
41
+
42
+ # Returns the Vayacondios topic for this PrepareSyncer.
43
+ #
44
+ # @return [String] the Vayacondios topic
45
+ def vayacondios_topic
46
+ "listeners.#{name}"
47
+ end
48
+
49
+ # Returns a representation of this PrepareSyncer suitable for a
50
+ # Vayacondios stash.
51
+ #
52
+ # @return [Hash]
53
+ def to_vayacondios
54
+ {
55
+ split: settings[:split],
56
+ lines: settings[:lines],
57
+ bytes: settings[:bytes],
58
+ ordered: settings[:ordered],
59
+ metadata: settings[:metadata],
60
+ }
61
+ end
62
+
63
+ module HandlerOverride
64
+
65
+ # The topic for this Handler.
66
+ #
67
+ # Delegates to PrepareSyncer#vayacondios_topic.
68
+ #
69
+ # @return [String]
70
+ def vayacondios_topic
71
+ syncer.vayacondios_topic
72
+ end
73
+
74
+ # Announce the file was processed.
75
+ #
76
+ # @param [Pathname] original
77
+ def after_process original
78
+ super(original)
79
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
80
+ success: true,
81
+ step: 'prepare',
82
+ path: fragment_for(original),
83
+ size: File.size(original),
84
+ })
85
+ end
86
+
87
+ # Announce an error in processing a file.
88
+ #
89
+ # @param [Pathname] original
90
+ # @param [Error] error
91
+ def on_error original, error
92
+ super(original, error)
93
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
94
+ success: false,
95
+ step: 'prepare',
96
+ path: fragment_for(original),
97
+ error: error.class,
98
+ message: error.message
99
+ })
100
+ end
101
+
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,59 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module S3SyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 's3', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: true,
20
+ step: 's3',
21
+ bytes: (bytes || 0),
22
+ }.tap { |e| e[:duration] = duration if duration })
23
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 1, time: Time.now.utc.to_i })
24
+ end
25
+
26
+ # Announces an error during a sync and updates the last sync
27
+ # state and time.
28
+ def on_error error
29
+ super(error)
30
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
31
+ success: false,
32
+ step: 's3',
33
+ error: error.class,
34
+ message: error.message,
35
+ bytes: bytes,
36
+ })
37
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 0, time: Time.now.utc.to_i })
38
+ end
39
+
40
+ # Returns the Vayacondios topic for this S3Syncer.
41
+ #
42
+ # @return [String] the Vayacondios topic
43
+ def vayacondios_topic
44
+ "listeners.#{name}"
45
+ end
46
+
47
+ # Returns a representation of this S3Syncer suitable for a
48
+ # Vayacondios stash.
49
+ #
50
+ # @return [Hash]
51
+ def to_vayacondios
52
+ {
53
+ bucket: s3_uri,
54
+ region: settings[:region],
55
+ }
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ module Wukong
2
+ module Deploy
3
+ module StormInvocationOverride
4
+ def params_to_pass
5
+ Deploy.pre_deploy_settings
6
+ end
7
+ end
8
+ end
9
+ end
10
+
11
+
12
+
13
+
@@ -0,0 +1,17 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Makes `UsesFileState#file_state_dir` default to the
5
+ # `Wukong::Deploy.tmp_dir`.
6
+ module UsesFileStateOverride
7
+
8
+ # Overrides the default value of #file_state_dir with
9
+ # Wukong::Deploy.tmp_dir.
10
+ #
11
+ # @return [String]
12
+ def file_state_dir
13
+ Wukong::Deploy.tmp_dir.to_s
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Makes `UsesLockfile#lockfile_dir` default to the
5
+ # `Wukong::Deploy.tmp_dir`.
6
+ module UsesLockfileOverride
7
+
8
+ # Overrides the default value of #lockfile_dir with
9
+ # Wukong::Deploy.tmp_dir.
10
+ #
11
+ # @return [String]
12
+ def lockfile_dir
13
+ Wukong::Deploy.tmp_dir.to_s
14
+ end
15
+ end
16
+ end
17
+ end
@@ -68,6 +68,8 @@ module Wukong
68
68
  config/environments/test.yml
69
69
  config/environments/production.yml
70
70
 
71
+ config/initializers/event_machine.rb
72
+
71
73
  Gemfile
72
74
  Rakefile
73
75
  README.md
@@ -0,0 +1,2 @@
1
+ require_relative('extensions/storm_invocation_override')
2
+ Wukong::Storm::StormRunner.class_eval { include Wukong::Deploy::StormInvocationOverride }
@@ -0,0 +1,27 @@
1
+ require_relative('extensions/uses_lockfile')
2
+ require_relative('extensions/uses_file_state')
3
+ require_relative('extensions/ftp_syncer')
4
+ require_relative('extensions/prepare_syncer')
5
+ require_relative('extensions/s3_syncer')
6
+
7
+ Wukong::Load::SyncRunner.class_eval do
8
+ include Wukong::Deploy::UsesLockfileOverride
9
+ end
10
+ if defined?(Wukong::Load::SyncAllRunner)
11
+ Wukong::Load::SyncAllRunner.class_eval do
12
+ include Wukong::Deploy::UsesLockfileOverride
13
+ end
14
+ end
15
+ Wukong::Load::FTPSyncer.class_eval do
16
+ include Wukong::Deploy::FTPSyncerOverride
17
+ end
18
+ Wukong::Load::PrepareSyncer.class_eval do
19
+ include Wukong::Deploy::UsesFileStateOverride
20
+ include Wukong::Deploy::PrepareSyncerOverride
21
+ end
22
+ Wukong::Load::PrepareSyncer::Handler.class_eval do
23
+ include Wukong::Deploy::PrepareSyncerOverride::HandlerOverride
24
+ end
25
+ Wukong::Load::S3Syncer.class_eval do
26
+ include Wukong::Deploy::S3SyncerOverride
27
+ end
@@ -1,7 +1,12 @@
1
1
  require 'rake'
2
2
 
3
+ require 'wukong/rake_helper'
4
+
3
5
  require 'yard'
4
- YARD::Rake::YardocTask.new(:doc)
6
+ YARD::Rake::YardocTask.new(:doc) do |t|
7
+ t.files = %w[app/**/*.rb Rakefile Gemfile]
8
+ t.options = ['--load', Wukong.doc_helpers_path]
9
+ end
5
10
 
6
11
  require 'rspec/core/rake_task'
7
12
  RSpec::Core::RakeTask.new(:spec)
@@ -1,5 +1,5 @@
1
1
  module Wukong
2
2
  module Deploy
3
- VERSION = '0.1.1'
3
+ VERSION = '0.2.0'
4
4
  end
5
5
  end
@@ -1,5 +1,5 @@
1
- # Use the public [RubyGems server](http://rubygems.org/)
2
- source :rubygems
1
+ # Use the public [RubyGems server](https://rubygems.org/)
2
+ source 'https://rubygems.org'
3
3
 
4
4
  # = Application
5
5
  #
@@ -12,7 +12,7 @@ source :rubygems
12
12
  #
13
13
  # here and then remember to run
14
14
  #
15
- # $ bundle install --standalone
15
+ # $ bundle install
16
16
  #
17
17
  # Here's a link to the [Bundler documentation](http://gembundler.com/)
18
18
  # for your convenience.
@@ -46,6 +46,7 @@ gem 'wonderdog', '0.1.0'
46
46
  # Provides
47
47
  #
48
48
  # - wu-storm
49
+ # - wu-bolt
49
50
  gem 'wukong-storm', '0.1.0'
50
51
 
51
52
  # Lets you load JSON blobs into data stores.
@@ -56,13 +57,50 @@ gem 'wukong-storm', '0.1.0'
56
57
  # - wu-source
57
58
  gem 'wukong-load', '0.1.0'
58
59
 
59
- # Framework for gluing code together.
60
+ # If you want to use wu-load with certain databases you may have to
61
+ # enable the various gems required for the database(s) you're
62
+ # interested in here.
63
+ group :load do
64
+
65
+ #
66
+ # Already required by one or more Wukong plugins:
67
+ #
68
+ # - Kafka
69
+ #
70
+ # Not required because uses Ruby standard library to connect:
71
+ #
72
+ # - ElasticSearch
73
+ #
74
+
75
+ # Enable MongoDB
76
+ #
77
+ # gem 'mongo'
78
+ # gem 'bson_ext'
79
+
80
+ # Enable MySQL
81
+ #
82
+ # gem 'mysql2'
83
+ end
84
+
85
+ # Framework for gluing Wukong plugins together into a deploy pack.
60
86
  #
61
87
  # Provides
62
88
  #
63
89
  # - wu-deploy
64
90
  gem 'wukong-deploy', '0.1.0'
65
91
 
92
+ # Provides metadata about a deploy pack.
93
+ #
94
+ # Provides
95
+ #
96
+ # - wu-meta
97
+ #
98
+ gem 'wukong-meta', '0.1.0'
99
+
100
+ # Integrates Wukong processors with Vayacondios.
101
+ #
102
+ gem 'wukong-vayacondios', '0.1.0'
103
+
66
104
  group :development do
67
105
  gem 'yard'
68
106
  gem 'redcarpet'
@@ -94,55 +94,84 @@ want to run `rbenv exec bundle install`.
94
94
 
95
95
  ### Configuration
96
96
 
97
- Your deploy pack doesn't need any configuration out of the box. As
98
- you begin to extend it you may add functionality which benefits from
99
- the ability to be configured.
97
+ #### Configuring the Environment
100
98
 
101
- Put any configuration you want shared across all environments into the
102
- file `config/settings.yml`. Override this with environment-specific
103
- configuration in the appropriate file within `config/environments`.
99
+ Before any of the `wu` programs can run, the Ruby process must first
100
+ boot up, require Wukong and all necessary dependencies (such as
101
+ 'event-machine') and plugins (such as the deploy pack plugin
102
+ `wukong-deploy`), and then hand over control to the `wu` program.
104
103
 
105
- As an example, you may write a processor like this:
104
+ The following Ruby files are loaded in order. Each file is
105
+ responsible for configuring some part of this runtime environment:
106
+
107
+ 1. `config/environment` -- requires the rest of the files and adds any additional environmental code
108
+ 2. `config/application` -- defines the load order of external libraries, Wukong plugins, and application code
109
+ 3. `config/boot` -- defines how and where the Ruby process will look for code dependencies (through Bundler)
110
+ 4. `config/initializers/*.rb` -- non-Wukong configuration for external libraries or application code can live here
111
+
112
+ #### Configuring the Application
113
+
114
+ The application a given deploy pack is running can be configured at
115
+ several different layers.
116
+
117
+ The simplest layer is settings passed to `wu` programs on the
118
+ command-line. These settings have the highest precedence and will
119
+ always be read.
120
+
121
+ When booting any of the `wu` tools the deploy pack will also read and
122
+ merge settings from the following configuration files, in order of
123
+ **increasing** precedence:
124
+
125
+ 1. `config/settings.yml`
126
+ 2. `config/settings/*.yml` if present, without any guarantee as to order
127
+ 3. `config/environments/[environment].yml`
128
+ 4. `config/environments/[environment]/*.yml` if present, without any guarantee as to order
129
+ 5. `config/deploy.yml` if present (this file should be ignored by version control)
130
+ 6. `config/environments/deploy-[environment].yml` if present (this file should be ignored by version control)
131
+
132
+ Finally, if interaction with Vayacondios is turned on, settings will
133
+ also be read from a Vayacondios stash (see the <a
134
+ href="#vayacondios>Vayacondios section</a> below).
135
+
136
+ Completely merged and resolved configuration settings are accessible
137
+ globally (once the Wukong framework has been booted) via the
138
+ `Wukong::Deploy.settings` object. Any piece of code in a model,
139
+ processor, dataflow, or elsewhere can read and write to this object.
140
+
141
+ Processors will *automatically* read settings for their fields from a
142
+ subhash within this global settings object. Given a processor like
106
143
 
107
144
  ```ruby
108
- Wukong.procesor(:configurable_decorator) do
109
- field :suffix, String, :default => '.'
110
- def process record
111
- yield [record, suffix].join
145
+ Wukong.processor(:tokenizer) do
146
+ field :min_length, Integer, default: 2
147
+ def process line
148
+ ...
112
149
  end
113
150
  end
114
151
  ```
115
152
 
116
- This processor's `suffix` property can be set on the command-line:
117
-
118
- ```
119
- $ cat input
120
- 1
121
- 2
122
- 3
123
- $ cat input | wu-local configurable_decorator
124
- 1.
125
- 2.
126
- 3.
127
- $ cat input | wu-local configurable_decorator --suffix=','
128
- 1,
129
- 2,
130
- 3,
131
-
132
- You can also set the same property in a configuration file, scoped by
133
- the name of the processor:
153
+ you can set override the value of its `min_length` field by putting
154
+ the following section into any one of the configuration files above:
134
155
 
135
156
  ```yaml
136
- # in config/settings.yml
137
157
  ---
158
+ # in config/settings.yml, for example
138
159
 
139
- configurable_decorator:
140
- suffix: ,
160
+ tokenizer:
161
+ min_length: 5
141
162
  ```
142
163
 
143
- which lets you the `--suffix` flag on the command-line while still
144
- overriding the default setting. You can also put such settings in
145
- environment specific files within `config/environments`.
164
+ which would now make the command
165
+
166
+ ```
167
+ $ cat corpus.txt | wu local tokenizer
168
+ ```
169
+
170
+ have the same effect as
171
+
172
+ ```
173
+ $ cat corpus.txt | wu local tokenizer --min_length=5
174
+ ```
146
175
 
147
176
  ## File Structure
148
177
 
@@ -206,3 +235,200 @@ the underlying documentation for Wukong and its plugins, specifically:
206
235
 
207
236
  * on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
208
237
  * on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
238
+
239
+
240
+ <a target="#vayacondios">
241
+ ## Interacting with Vayacondios
242
+
243
+ [Vayacondios](http://github.com/infochimps-labs/vayacondios) is a
244
+ program which makes it easy to for clients to announce events or read
245
+ and write settings to and from a central server.
246
+
247
+ The basic objects of Vayacondios are **stash** and the **event**:
248
+
249
+ * a **stash** is an "object", a "configuration", or "setting" designed to be shared among many services
250
+ * an **event** is a "fact", "measurement", or "metric" announced by an arbitrary service, possibly related to some stash
251
+
252
+ Stashes and events are organized in two levels.
253
+
254
+ The top-level is the **organization**. Data from multiple
255
+ organizations is stored together but accessed separately by a running
256
+ Vayacondios server. An organization could be the name of a user,
257
+ workgroup, application, or service using Vayacondios.
258
+
259
+ The next level is the **topic**. Each topic within Vayacondios has a
260
+ single stash and can have multiple events. An "object" like a server,
261
+ a database, an application, a service, or a user maps to the concept
262
+ of "topic".
263
+
264
+ Every `wu` tool running within a deploy pack takes an additional
265
+ option `--vcd` which turns on or off interactions with Vayacondios.
266
+ This option can be specified at runtime on the command-line as well as
267
+ via a configuration file. When not running "in Vayacondios mode"
268
+ (with `--vcd` was not passed), interactions with Vayacondios will be
269
+ logged instead of transmitted and received.
270
+
271
+ ### Configuring Vayacondios access
272
+
273
+ If you don't intend to interact with a Vayacondios server, you can
274
+ just set `vcd` to `false` for your whole environment and skip this
275
+ section (as is done, for example, in the `test` environment by
276
+ default).
277
+
278
+ If you intend to interact with Vayacondios then you need to also
279
+ specify the `vcd_host` and `vcd_port` options which otherwise default
280
+ to the usual Vayacondios server port running on localhost.
281
+
282
+ ```yaml
283
+ ---
284
+ # in config/environments/production.yml
285
+ vcd_host: 10.123.123.123
286
+ vcd_port: 9000
287
+ ```
288
+
289
+ Vayacondios also requires that all events and stashes are stored under
290
+ a given organization name. The Vayacondios organization, which will
291
+ likely be shared across all environments of your application, is
292
+ usually set at the top-level:
293
+
294
+ ```yaml
295
+ ---
296
+ # in config/settings.yml
297
+ organization: my_company
298
+ ```
299
+
300
+ ### Handle out of band event data with Events
301
+
302
+ Despite being designed to be powerful and scalable, Vayacondios is not
303
+ the appropriate store for high-volume, high-throughput,
304
+ mission-critical data which must be persisited over the long-term.
305
+ Instead it should be used for "out of band" data, which is typically
306
+ much smaller in volume and throughput than the main body of a
307
+ dataflow. Examples of such out of band events include:
308
+
309
+ * signalling some intermittend or runtime error
310
+ * warning that some event was bad or suspicious
311
+ * logging an error
312
+ * registering some periodic metric
313
+ * signaling a change in state
314
+
315
+ Announcements can be made from anywhere within the Wukong framework by
316
+ accessing the `Wukong::Deploy.vayacondios_client` object but the most
317
+ common approach is to announce events within a processor or within a
318
+ dataflow.
319
+
320
+ #### Announcing from a processor
321
+
322
+ The `Wukong::Processor#announce` method can be used to directly send
323
+ an event to Vayacondios on a given topic.
324
+
325
+ ```ruby
326
+ Wukong.processor(:parser) do
327
+ def process line
328
+ yield parse!(line)
329
+ rescue ParseError => e
330
+ announce "parser.errors", line: line
331
+ end
332
+ end
333
+ ```
334
+
335
+ It's important when setting up an announcement like this that you
336
+ consider how often this piece of code will actually send events to
337
+ Vayacondios. If a `ParseError` is triggered once in every 10,000
338
+ lines, this may be perfectly fine to be running in production. If 1
339
+ in 10 lines causes a similar error, this may not be the right
340
+ approach.
341
+
342
+ #### Announcing from a dataflow
343
+
344
+ The `announce` processor can be used to send all announce all incoming
345
+ events to Vayacondios. Here's an example flow which makes use of it:
346
+
347
+ ```ruby
348
+ Wukong.dataflow(:parse_source) do
349
+ parser |
350
+ [
351
+ select(&:valid?) | ... | to_json,
352
+ select(&:invalid?) | announce(topic: "invalid_records")
353
+ ]
354
+ end
355
+ ```
356
+
357
+ Just as in the above example with a processor, it's important that the
358
+ flow through the announce processor is not incredibly high-volume.
359
+
360
+ The `announce` processor is terminal; it yields no output records.
361
+
362
+ ### Allow dynamic configuration with Stashes
363
+
364
+ The deploy pack inside a backend system like Hadoop or Storm can fetch
365
+ stashes from Vayacondios during runtime. Other systems external to
366
+ the deploy pack can simultaneously be writing data into these same
367
+ stashes in Vayacondios, allowing for a lightweight, two-way
368
+ communication stream between the deploy pack and arbitrary external
369
+ resources, mediated by a key-value store (the Vayacondios stash).
370
+
371
+ Stashes can be read and written from anywhere within the Wukong
372
+ framework by accessing the `Wukong::Deploy.vayacondios_client` object
373
+ but there are two special places where encapsulated, remote settings
374
+ are very useful.
375
+
376
+ #### Dynamic settings for the deploy pack itself
377
+
378
+ Each deploy pack, as an application, can fetch a stash of settings
379
+ from Vayacondios and use this as bootup time in the same way it uses a
380
+ configuration file ond disk. All that is required is a Vayacondios
381
+ stash topic name. This is furnished by providing to the deploy pack
382
+ an `application` name in a configuration file, usually the top-level
383
+ one:
384
+
385
+ ```yaml
386
+ ---
387
+ # in config/settings.yml, for example
388
+
389
+ application: my_app
390
+ ```
391
+
392
+ When any `wu` tool is launched within the deploy pack with the `--vcd`
393
+ option (possibly set an an environment-wide level via a configuration
394
+ file) then remote settings from Vayacondios for the `application` will
395
+ be pulled at boot-time and merged into the local settings from
396
+ configuration files and the command-line.
397
+
398
+ #### Dynamic settings for processors
399
+
400
+ The processor `tokenizer` in the deploy pack with application name
401
+ `my_app` defaults to using the stash with topic
402
+ `processors.my_app-tokenizer` in Vayacondios to store its settings
403
+ (this can be changed by overriding the `Wukong::Processor#vcd_topic`
404
+ method).
405
+
406
+ These settings, if they exist, can be retrieved and merged into the
407
+ processor's current fields at anytime using the
408
+ `Wukong::Processor#update_settings`. A common use case is to want to
409
+ update a processor's fields every 30 seconds, or similar. This is
410
+ most easily accomplished via the
411
+ `Wukong::Processor#update_settings_every` method. Here's an example
412
+
413
+ ```ruby
414
+ Wukong.processor(:tagger) do
415
+ field :tags, Array, doc: "List of tags to check", default: []
416
+
417
+ def setup
418
+ update_settings_every(30)
419
+ end
420
+
421
+ def process record
422
+ tags.each do |tag|
423
+ ...
424
+ end
425
+ end
426
+ end
427
+ ```
428
+
429
+ The `tags` field of this processor will be updated every 30 seconds
430
+ with the latest values from Vayacondios.
431
+
432
+ The `Wukong::Processor#save_settings` and
433
+ `Wukong::Processor#save_settings_every` and methods can be used to
434
+ save settings from a processor **to** Vayacondios.
@@ -1,17 +1,6 @@
1
1
  # = config/boot.rb
2
2
  #
3
3
  # Loads the Ruby language framework and activates Bundler.
4
- #
5
- # We can use a system-wide bundle installation _or_ a standalone
6
- # bundle installation. If a standalone installation exists (at
7
- # /bundle/bundler/setup.rb), we will detect and use it. Otherwise, we
8
- # fall back on the system-wide bundle installation.
9
- #
10
- # Make sure you install the bundle locally with the following command:
11
- #
12
- # @example Install a standalone bundle of gems
13
- #
14
- # $ bundle install --standalone
15
4
 
16
5
  local_setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
17
6
  if File.exist?(local_setup_path)
@@ -0,0 +1,5 @@
1
+ # EventMachine hard-codes a very inconvenient maximum length for a
2
+ # line -- 16K. This is too small for many use cases and is enlarged
3
+ # here to 16M.
4
+ EventMachine::Protocols::LineAndTextProtocol.send(:remove_const, :MaxLineLength)
5
+ EventMachine::Protocols::LineAndTextProtocol.send(:const_set, :MaxLineLength, 16*1024*1024)
@@ -35,5 +35,10 @@ a.out
35
35
  /doc/*
36
36
  /pkg/*
37
37
 
38
+ # bundler -- do NOT add Gemfile.lock to this list
38
39
  .bundle
39
40
  bundle/*
41
+
42
+ # configuration files that are dropped in outside of git
43
+ config/development.yml
44
+ config/deploy.yml
@@ -31,11 +31,11 @@ Gem::Specification.new do |gem|
31
31
  EOF
32
32
 
33
33
  gem.files = `git ls-files`.split("\n")
34
- gem.executables = ['wu-deploy','wu']
34
+ gem.executables = ['wu-deploy']
35
35
  gem.test_files = gem.files.grep(/^spec/)
36
36
  gem.require_paths = ['lib']
37
37
 
38
- gem.add_dependency('wukong', '3.0.1')
38
+ gem.add_dependency('wukong', '4.0.0')
39
39
  gem.add_dependency('rake', '~> 0.9')
40
40
  gem.add_dependency('erubis')
41
41
  gem.add_dependency('diffy')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong-deploy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-03-07 00:00:00.000000000 Z
14
+ date: 2014-03-19 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: wukong
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - '='
22
22
  - !ruby/object:Gem::Version
23
- version: 3.0.1
23
+ version: 4.0.0
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,7 +28,7 @@ dependencies:
28
28
  requirements:
29
29
  - - '='
30
30
  - !ruby/object:Gem::Version
31
- version: 3.0.1
31
+ version: 4.0.0
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: rake
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -91,7 +91,6 @@ description: ! " The Infochimps Platform is an end-to-end, managed solution for
91
91
  email: coders@infochimps.com
92
92
  executables:
93
93
  - wu-deploy
94
- - wu
95
94
  extensions: []
96
95
  extra_rdoc_files: []
97
96
  files:
@@ -102,17 +101,25 @@ files:
102
101
  - LICENSE.md
103
102
  - README.md
104
103
  - Rakefile
105
- - bin/wu
106
104
  - bin/wu-deploy
107
105
  - examples/.gitkeep
108
106
  - lib/wukong-deploy.rb
109
107
  - lib/wukong-deploy/console.rb
110
108
  - lib/wukong-deploy/deploy_pack.rb
111
109
  - lib/wukong-deploy/deploy_runner.rb
112
- - lib/wukong-deploy/extensions.rb
110
+ - lib/wukong-deploy/dump_extensions.rb
111
+ - lib/wukong-deploy/extensions/ftp_syncer.rb
113
112
  - lib/wukong-deploy/extensions/hadoop_invocation_override.rb
113
+ - lib/wukong-deploy/extensions/prepare_syncer.rb
114
+ - lib/wukong-deploy/extensions/s3_syncer.rb
115
+ - lib/wukong-deploy/extensions/storm_invocation_override.rb
116
+ - lib/wukong-deploy/extensions/uses_file_state.rb
117
+ - lib/wukong-deploy/extensions/uses_lockfile.rb
118
+ - lib/wukong-deploy/hadoop_extensions.rb
114
119
  - lib/wukong-deploy/rake_runner.rb
115
120
  - lib/wukong-deploy/repo.rb
121
+ - lib/wukong-deploy/storm_extensions.rb
122
+ - lib/wukong-deploy/sync_extensions.rb
116
123
  - lib/wukong-deploy/tasks.rb
117
124
  - lib/wukong-deploy/templater.rb
118
125
  - lib/wukong-deploy/templater/conflict_resolution.rb
@@ -131,6 +138,7 @@ files:
131
138
  - templates/config/environments/development.yml.erb
132
139
  - templates/config/environments/production.yml.erb
133
140
  - templates/config/environments/test.yml.erb
141
+ - templates/config/initializers/event_machine.rb.erb
134
142
  - templates/config/settings.yml.erb
135
143
  - templates/gitignore
136
144
  - templates/spec/spec_helper.rb.erb
@@ -150,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
150
158
  version: '0'
151
159
  segments:
152
160
  - 0
153
- hash: 687970152159308186
161
+ hash: -3128770076158943322
154
162
  required_rubygems_version: !ruby/object:Gem::Requirement
155
163
  none: false
156
164
  requirements:
@@ -159,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
159
167
  version: '0'
160
168
  segments:
161
169
  - 0
162
- hash: 687970152159308186
170
+ hash: -3128770076158943322
163
171
  requirements: []
164
172
  rubyforge_project:
165
- rubygems_version: 1.8.24
173
+ rubygems_version: 1.8.23
166
174
  signing_key:
167
175
  specification_version: 3
168
176
  summary: Defines the deploy pack framework used by the Infochimps Platform
data/bin/wu DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'shellwords'
3
- now=Time.now.strftime("%Y-%m-%d %H:%M:%S")
4
- if ARGV.empty?
5
- $stderr.puts "ERROR #{now} [wu ] -- Must provide a Wukong command to run. Try the --help option."
6
- else
7
- if ARGV.size == 1 && ARGV.first == '--help'
8
- $stderr.puts <<EOF
9
- usage: wu COMMAND [OPTIONS] [ARG] ...
10
-
11
- wu is a wrapper for easy use of Wukong's command-line tools. It takes
12
- your arguments, constructs the name of the proper wu-tool to call, and
13
- prepends a call to bundle exec.
14
-
15
- $ wu local ...
16
-
17
- is equivalent to
18
-
19
- $ bundle exec wu-local ...
20
-
21
- You can run any of the wu-tools like
22
-
23
- wu-local
24
- wu-hadoop
25
- wu-deploy
26
-
27
- EOF
28
- else
29
- if ARGV.first =~ /^-/
30
- $stderr.puts "ERROR ${now} [wu ] -- First argument must be the name of a wu tool to run, got <${1}>"
31
- else
32
- Kernel.exec "bundle exec wu-#{Shellwords.join(ARGV)}"
33
- end
34
- end
35
- end