wukong-deploy 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -57,3 +57,4 @@ away
57
57
  .rbx
58
58
  Gemfile.lock
59
59
  Backup*of*.numbers
60
+ *.gem
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
@@ -26,8 +26,15 @@ module Wukong
26
26
  settings.define(:skip, :description => "Skip existing files", :type => :boolean, :default => false)
27
27
  settings.define(:force, :description => "Overwrite existing files", :type => :boolean, :default => false)
28
28
  when 'wu-hadoop'
29
- require_relative('wukong-deploy/extensions')
29
+ require_relative('wukong-deploy/hadoop_extensions')
30
30
  settings[:command_prefix] = 'bundle exec'
31
+ when 'wu-storm'
32
+ require_relative('wukong-deploy/storm_extensions')
33
+ settings[:command_prefix] = 'bundle exec'
34
+ when 'wu-dump'
35
+ require_relative('wukong-deploy/dump_extensions')
36
+ when 'wu-sync', 'wu-sync-all'
37
+ require_relative('wukong-deploy/sync_extensions')
31
38
  end
32
39
  end
33
40
 
@@ -1,5 +1,10 @@
1
1
  module Wukong
2
2
  module Deploy
3
+
4
+ # Return the name of this deploy pack.
5
+ def self.name
6
+ settings[:application]
7
+ end
3
8
 
4
9
  # Return the root directory of this deploy pack.
5
10
  #
@@ -58,11 +63,11 @@ module Wukong
58
63
  @root
59
64
  end
60
65
 
61
- # The directory this deploy pack keeps local, sample data in.
66
+ # The directory this deploy pack puts all its application code in.
62
67
  #
63
68
  # @return [Pathname]
64
- def self.data_dir
65
- root.join('data')
69
+ def self.app_dir
70
+ root.join('app')
66
71
  end
67
72
 
68
73
  # The directory this deploy pack uses for configuration files.
@@ -71,12 +76,19 @@ module Wukong
71
76
  def self.config_dir
72
77
  root.join('config')
73
78
  end
79
+
80
+ # The directory this deploy pack keeps local, sample data in.
81
+ #
82
+ # @return [Pathname]
83
+ def self.data_dir
84
+ root.join('data')
85
+ end
74
86
 
75
- # The directory this deploy pack uses for temporary files.
87
+ # The directory this deploy pack uses for lib files.
76
88
  #
77
89
  # @return [Pathname]
78
- def self.tmp_dir
79
- root.join('tmp')
90
+ def self.lib_dir
91
+ root.join('lib')
80
92
  end
81
93
 
82
94
  # The directory this deploy pack uses for logs.
@@ -86,11 +98,33 @@ module Wukong
86
98
  root.join('log')
87
99
  end
88
100
 
89
- # The diretory this deploy pack puts all its application code in.
101
+ # The directory this deploy pack puts all its scripts in.
90
102
  #
91
103
  # @return [Pathname]
92
- def self.app_dir
93
- root.join('app')
104
+ def self.script_dir
105
+ root.join('script')
106
+ end
107
+
108
+ # The directory this deploy pack puts all its Ruby unit tests.
109
+ #
110
+ # @return [Pathname]
111
+ def self.spec_dir
112
+ root.join('spec')
113
+ end
114
+
115
+ # The directory this deploy pack puts all its non-Ruby source
116
+ # code.
117
+ #
118
+ # @return [Pathname]
119
+ def self.src_dir
120
+ root.join('src')
121
+ end
122
+
123
+ # The directory this deploy pack uses for temporary files.
124
+ #
125
+ # @return [Pathname]
126
+ def self.tmp_dir
127
+ root.join('tmp')
94
128
  end
95
129
 
96
130
  private
@@ -148,9 +182,16 @@ module Wukong
148
182
  end
149
183
 
150
184
  # Read remote settings.
151
- #
152
- # FIXME -- not implemented yet.
153
185
  def self.read_remote_settings
186
+ return unless settings[:vcd]
187
+ if name.nil? || name.empty?
188
+ Wukong::Log.warn("Define the :application settings to give this deploy pack a name and be be able to read remote settings.")
189
+ return
190
+ end
191
+ topic = "deploy_packs.#{name}"
192
+ remote_settings = vayacondios_client.get(topic)
193
+ settings.merge(remote_settings) if remote_settings.success?
154
194
  end
195
+
155
196
  end
156
197
  end
@@ -0,0 +1,6 @@
1
+ require_relative('extensions/uses_lockfile')
2
+ require_relative('extensions/uses_file_state')
3
+ Wukong::Load::DirectoryDumper.class_eval do
4
+ include Wukong::Deploy::UsesLockfileOverride
5
+ include Wukong::Deploy::UsesFileStateOverride
6
+ end
@@ -0,0 +1,67 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module FTPSyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'ftp', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: true,
20
+ step: 'ftp',
21
+ files: self.files,
22
+ }.tap { |e| e[:duration] = duration if duration })
23
+ now = Time.now.utc.to_i
24
+ last_ftp_sync_summary = { state: 1, time: now}
25
+ last_ftp_sync_summary[:data] = now unless self.files.empty?
26
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", last_ftp_sync_summary)
27
+ end
28
+
29
+ # Announces an error during a sync and updates the last sync
30
+ # state and time.
31
+ def on_error error
32
+ super(error)
33
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
34
+ success: false,
35
+ step: 'ftp',
36
+ error: error.class,
37
+ message: error.message,
38
+ files: self.files,
39
+ })
40
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "ftp.last", { state: 0, time: Time.now.utc.to_i })
41
+ end
42
+
43
+ # Returns the Vayacondios topic for this FTP syncer.
44
+ #
45
+ # @return [String] the Vayacondios topic
46
+ def vayacondios_topic
47
+ "listeners.#{name}"
48
+ end
49
+
50
+ # Returns a representation of this FTP syncer suitable for a
51
+ # Vayacondios stash.
52
+ #
53
+ # @return [Hash]
54
+ def to_vayacondios
55
+ {
56
+ protocol: settings[:protocol],
57
+ username: settings[:username],
58
+ password: (settings[:password] ? true : false),
59
+ host: settings[:host],
60
+ port: port,
61
+ path: settings[:path],
62
+ }
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,105 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module PrepareSyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 'prepare', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: success?,
20
+ step: 'prepare',
21
+ counts: counts,
22
+ files: files,
23
+ }.tap { |e| e[:duration] = duration if duration })
24
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: (success? ? 1 : 0), time: Time.now.utc.to_i })
25
+ end
26
+
27
+ # Announces an error during a sync and updates the last sync
28
+ # state and time.
29
+ def on_error error
30
+ super(error)
31
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
32
+ success: false,
33
+ step: 'prepare',
34
+ error: error.class,
35
+ message: error.message,
36
+ counts: counts,
37
+ files: files,
38
+ })
39
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "prepare.last", { state: 0, time: Time.now.utc.to_i })
40
+ end
41
+
42
+ # Returns the Vayacondios topic for this PrepareSyncer.
43
+ #
44
+ # @return [String] the Vayacondios topic
45
+ def vayacondios_topic
46
+ "listeners.#{name}"
47
+ end
48
+
49
+ # Returns a representation of this PrepareSyncer suitable for a
50
+ # Vayacondios stash.
51
+ #
52
+ # @return [Hash]
53
+ def to_vayacondios
54
+ {
55
+ split: settings[:split],
56
+ lines: settings[:lines],
57
+ bytes: settings[:bytes],
58
+ ordered: settings[:ordered],
59
+ metadata: settings[:metadata],
60
+ }
61
+ end
62
+
63
+ module HandlerOverride
64
+
65
+ # The topic for this Handler.
66
+ #
67
+ # Delegates to PrepareSyncer#vayacondios_topic.
68
+ #
69
+ # @return [String]
70
+ def vayacondios_topic
71
+ syncer.vayacondios_topic
72
+ end
73
+
74
+ # Announce the file was processed.
75
+ #
76
+ # @param [Pathname] original
77
+ def after_process original
78
+ super(original)
79
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
80
+ success: true,
81
+ step: 'prepare',
82
+ path: fragment_for(original),
83
+ size: File.size(original),
84
+ })
85
+ end
86
+
87
+ # Announce an error in processing a file.
88
+ #
89
+ # @param [Pathname] original
90
+ # @param [Error] error
91
+ def on_error original, error
92
+ super(original, error)
93
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
94
+ success: false,
95
+ step: 'prepare',
96
+ path: fragment_for(original),
97
+ error: error.class,
98
+ message: error.message
99
+ })
100
+ end
101
+
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,59 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Attaches to the hooks provided by the Wukong::Load::Syncer class
5
+ # to write data to Vayacondios.
6
+ module S3SyncerOverride
7
+
8
+ # Saves the syncer as a stash in Vayacondios.
9
+ def before_sync
10
+ super()
11
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, 's3', self)
12
+ end
13
+
14
+ # Announces a successful sync and updates the last sync state
15
+ # and time.
16
+ def after_sync
17
+ super()
18
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
19
+ success: true,
20
+ step: 's3',
21
+ bytes: (bytes || 0),
22
+ }.tap { |e| e[:duration] = duration if duration })
23
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 1, time: Time.now.utc.to_i })
24
+ end
25
+
26
+ # Announces an error during a sync and updates the last sync
27
+ # state and time.
28
+ def on_error error
29
+ super(error)
30
+ Wukong::Deploy.vayacondios_client.announce(vayacondios_topic, {
31
+ success: false,
32
+ step: 's3',
33
+ error: error.class,
34
+ message: error.message,
35
+ bytes: bytes,
36
+ })
37
+ Wukong::Deploy.vayacondios_client.set(vayacondios_topic, "s3.last", { state: 0, time: Time.now.utc.to_i })
38
+ end
39
+
40
+ # Returns the Vayacondios topic for this S3Syncer.
41
+ #
42
+ # @return [String] the Vayacondios topic
43
+ def vayacondios_topic
44
+ "listeners.#{name}"
45
+ end
46
+
47
+ # Returns a representation of this S3Syncer suitable for a
48
+ # Vayacondios stash.
49
+ #
50
+ # @return [Hash]
51
+ def to_vayacondios
52
+ {
53
+ bucket: s3_uri,
54
+ region: settings[:region],
55
+ }
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,13 @@
1
+ module Wukong
2
+ module Deploy
3
+ module StormInvocationOverride
4
+ def params_to_pass
5
+ Deploy.pre_deploy_settings
6
+ end
7
+ end
8
+ end
9
+ end
10
+
11
+
12
+
13
+
@@ -0,0 +1,17 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Makes `UsesFileState#file_state_dir` default to the
5
+ # `Wukong::Deploy.tmp_dir`.
6
+ module UsesFileStateOverride
7
+
8
+ # Overrides the default value of #file_state_dir with
9
+ # Wukong::Deploy.tmp_dir.
10
+ #
11
+ # @return [String]
12
+ def file_state_dir
13
+ Wukong::Deploy.tmp_dir.to_s
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Wukong
2
+ module Deploy
3
+
4
+ # Makes `UsesLockfile#lockfile_dir` default to the
5
+ # `Wukong::Deploy.tmp_dir`.
6
+ module UsesLockfileOverride
7
+
8
+ # Overrides the default value of #lockfile_dir with
9
+ # Wukong::Deploy.tmp_dir.
10
+ #
11
+ # @return [String]
12
+ def lockfile_dir
13
+ Wukong::Deploy.tmp_dir.to_s
14
+ end
15
+ end
16
+ end
17
+ end
@@ -68,6 +68,8 @@ module Wukong
68
68
  config/environments/test.yml
69
69
  config/environments/production.yml
70
70
 
71
+ config/initializers/event_machine.rb
72
+
71
73
  Gemfile
72
74
  Rakefile
73
75
  README.md
@@ -0,0 +1,2 @@
1
+ require_relative('extensions/storm_invocation_override')
2
+ Wukong::Storm::StormRunner.class_eval { include Wukong::Deploy::StormInvocationOverride }
@@ -0,0 +1,27 @@
1
+ require_relative('extensions/uses_lockfile')
2
+ require_relative('extensions/uses_file_state')
3
+ require_relative('extensions/ftp_syncer')
4
+ require_relative('extensions/prepare_syncer')
5
+ require_relative('extensions/s3_syncer')
6
+
7
+ Wukong::Load::SyncRunner.class_eval do
8
+ include Wukong::Deploy::UsesLockfileOverride
9
+ end
10
+ if defined?(Wukong::Load::SyncAllRunner)
11
+ Wukong::Load::SyncAllRunner.class_eval do
12
+ include Wukong::Deploy::UsesLockfileOverride
13
+ end
14
+ end
15
+ Wukong::Load::FTPSyncer.class_eval do
16
+ include Wukong::Deploy::FTPSyncerOverride
17
+ end
18
+ Wukong::Load::PrepareSyncer.class_eval do
19
+ include Wukong::Deploy::UsesFileStateOverride
20
+ include Wukong::Deploy::PrepareSyncerOverride
21
+ end
22
+ Wukong::Load::PrepareSyncer::Handler.class_eval do
23
+ include Wukong::Deploy::PrepareSyncerOverride::HandlerOverride
24
+ end
25
+ Wukong::Load::S3Syncer.class_eval do
26
+ include Wukong::Deploy::S3SyncerOverride
27
+ end
@@ -1,7 +1,12 @@
1
1
  require 'rake'
2
2
 
3
+ require 'wukong/rake_helper'
4
+
3
5
  require 'yard'
4
- YARD::Rake::YardocTask.new(:doc)
6
+ YARD::Rake::YardocTask.new(:doc) do |t|
7
+ t.files = %w[app/**/*.rb Rakefile Gemfile]
8
+ t.options = ['--load', Wukong.doc_helpers_path]
9
+ end
5
10
 
6
11
  require 'rspec/core/rake_task'
7
12
  RSpec::Core::RakeTask.new(:spec)
@@ -1,5 +1,5 @@
1
1
  module Wukong
2
2
  module Deploy
3
- VERSION = '0.1.1'
3
+ VERSION = '0.2.0'
4
4
  end
5
5
  end
@@ -1,5 +1,5 @@
1
- # Use the public [RubyGems server](http://rubygems.org/)
2
- source :rubygems
1
+ # Use the public [RubyGems server](https://rubygems.org/)
2
+ source 'https://rubygems.org'
3
3
 
4
4
  # = Application
5
5
  #
@@ -12,7 +12,7 @@ source :rubygems
12
12
  #
13
13
  # here and then remember to run
14
14
  #
15
- # $ bundle install --standalone
15
+ # $ bundle install
16
16
  #
17
17
  # Here's a link to the [Bundler documentation](http://gembundler.com/)
18
18
  # for your convenience.
@@ -46,6 +46,7 @@ gem 'wonderdog', '0.1.0'
46
46
  # Provides
47
47
  #
48
48
  # - wu-storm
49
+ # - wu-bolt
49
50
  gem 'wukong-storm', '0.1.0'
50
51
 
51
52
  # Lets you load JSON blobs into data stores.
@@ -56,13 +57,50 @@ gem 'wukong-storm', '0.1.0'
56
57
  # - wu-source
57
58
  gem 'wukong-load', '0.1.0'
58
59
 
59
- # Framework for gluing code together.
60
+ # If you want to use wu-load with certain databases you may have to
61
+ # enable the various gems required for the database(s) you're
62
+ # interested in here.
63
+ group :load do
64
+
65
+ #
66
+ # Already required by one or more Wukong plugins:
67
+ #
68
+ # - Kafka
69
+ #
70
+ # Not required because uses Ruby standard library to connect:
71
+ #
72
+ # - ElasticSearch
73
+ #
74
+
75
+ # Enable MongoDB
76
+ #
77
+ # gem 'mongo'
78
+ # gem 'bson_ext'
79
+
80
+ # Enable MySQL
81
+ #
82
+ # gem 'mysql2'
83
+ end
84
+
85
+ # Framework for gluing Wukong plugins together into a deploy pack.
60
86
  #
61
87
  # Provides
62
88
  #
63
89
  # - wu-deploy
64
90
  gem 'wukong-deploy', '0.1.0'
65
91
 
92
+ # Provides metadata about a deploy pack.
93
+ #
94
+ # Provides
95
+ #
96
+ # - wu-meta
97
+ #
98
+ gem 'wukong-meta', '0.1.0'
99
+
100
+ # Integrates Wukong processors with Vayacondios.
101
+ #
102
+ gem 'wukong-vayacondios', '0.1.0'
103
+
66
104
  group :development do
67
105
  gem 'yard'
68
106
  gem 'redcarpet'
@@ -94,55 +94,84 @@ want to run `rbenv exec bundle install`.
94
94
 
95
95
  ### Configuration
96
96
 
97
- Your deploy pack doesn't need any configuration out of the box. As
98
- you begin to extend it you may add functionality which benefits from
99
- the ability to be configured.
97
+ #### Configuring the Environment
100
98
 
101
- Put any configuration you want shared across all environments into the
102
- file `config/settings.yml`. Override this with environment-specific
103
- configuration in the appropriate file within `config/environments`.
99
+ Before any of the `wu` programs can run, the Ruby process must first
100
+ boot up, require Wukong and all necessary dependencies (such as
101
+ 'event-machine') and plugins (such as the deploy pack plugin
102
+ `wukong-deploy`), and then hand over control to the `wu` program.
104
103
 
105
- As an example, you may write a processor like this:
104
+ The following Ruby files are loaded in order. Each file is
105
+ responsible for configuring some part of this runtime environment:
106
+
107
+ 1. `config/environment` -- requires the rest of the files and adds any additional environmental code
108
+ 2. `config/application` -- defines the load order of external libraries, Wukong plugins, and application code
109
+ 3. `config/boot` -- defines how and where the Ruby process will look for code dependencies (through Bundler)
110
+ 4. `config/initializers/*.rb` -- non-Wukong configuration for external libraries or application code can live here
111
+
112
+ #### Configuring the Application
113
+
114
+ The application a given deploy pack is running can be configured at
115
+ several different layers.
116
+
117
+ The simplest layer is settings passed to `wu` programs on the
118
+ command-line. These settings have the highest precedence and will
119
+ always be read.
120
+
121
+ When booting any of the `wu` tools the deploy pack will also read and
122
+ merge settings from the following configuration files, in order of
123
+ **increasing** precedence:
124
+
125
+ 1. `config/settings.yml`
126
+ 2. `config/settings/*.yml` if present, without any guarantee as to order
127
+ 3. `config/environments/[environment].yml`
128
+ 4. `config/environments/[environment]/*.yml` if present, without any guarantee as to order
129
+ 5. `config/deploy.yml` if present (this file should be ignored by version control)
130
+ 6. `config/environments/deploy-[environment].yml` if present (this file should be ignored by version control)
131
+
132
+ Finally, if interaction with Vayacondios is turned on, settings will
133
+ also be read from a Vayacondios stash (see the <a
134
+ href="#vayacondios>Vayacondios section</a> below).
135
+
136
+ Completely merged and resolved configuration settings are accessible
137
+ globally (once the Wukong framework has been booted) via the
138
+ `Wukong::Deploy.settings` object. Any piece of code in a model,
139
+ processor, dataflow, or elsewhere can read and write to this object.
140
+
141
+ Processors will *automatically* read settings for their fields from a
142
+ subhash within this global settings object. Given a processor like
106
143
 
107
144
  ```ruby
108
- Wukong.procesor(:configurable_decorator) do
109
- field :suffix, String, :default => '.'
110
- def process record
111
- yield [record, suffix].join
145
+ Wukong.processor(:tokenizer) do
146
+ field :min_length, Integer, default: 2
147
+ def process line
148
+ ...
112
149
  end
113
150
  end
114
151
  ```
115
152
 
116
- This processor's `suffix` property can be set on the command-line:
117
-
118
- ```
119
- $ cat input
120
- 1
121
- 2
122
- 3
123
- $ cat input | wu-local configurable_decorator
124
- 1.
125
- 2.
126
- 3.
127
- $ cat input | wu-local configurable_decorator --suffix=','
128
- 1,
129
- 2,
130
- 3,
131
-
132
- You can also set the same property in a configuration file, scoped by
133
- the name of the processor:
153
+ you can set override the value of its `min_length` field by putting
154
+ the following section into any one of the configuration files above:
134
155
 
135
156
  ```yaml
136
- # in config/settings.yml
137
157
  ---
158
+ # in config/settings.yml, for example
138
159
 
139
- configurable_decorator:
140
- suffix: ,
160
+ tokenizer:
161
+ min_length: 5
141
162
  ```
142
163
 
143
- which lets you the `--suffix` flag on the command-line while still
144
- overriding the default setting. You can also put such settings in
145
- environment specific files within `config/environments`.
164
+ which would now make the command
165
+
166
+ ```
167
+ $ cat corpus.txt | wu local tokenizer
168
+ ```
169
+
170
+ have the same effect as
171
+
172
+ ```
173
+ $ cat corpus.txt | wu local tokenizer --min_length=5
174
+ ```
146
175
 
147
176
  ## File Structure
148
177
 
@@ -206,3 +235,200 @@ the underlying documentation for Wukong and its plugins, specifically:
206
235
 
207
236
  * on [Wukong](http://github.com/infochimps-labs/wukong/tree/3.0.0) so you understand the basic idea of a processor and how to glue processors together
208
237
  * on [Wukong-Hadoop](http://github.com/infochimps-labs/wukong-hadoop) so you understand how to move between local and Hadoop modes for batch analytics
238
+
239
+
240
+ <a target="#vayacondios">
241
+ ## Interacting with Vayacondios
242
+
243
+ [Vayacondios](http://github.com/infochimps-labs/vayacondios) is a
244
+ program which makes it easy to for clients to announce events or read
245
+ and write settings to and from a central server.
246
+
247
+ The basic objects of Vayacondios are **stash** and the **event**:
248
+
249
+ * a **stash** is an "object", a "configuration", or "setting" designed to be shared among many services
250
+ * an **event** is a "fact", "measurement", or "metric" announced by an arbitrary service, possibly related to some stash
251
+
252
+ Stashes and events are organized in two levels.
253
+
254
+ The top-level is the **organization**. Data from multiple
255
+ organizations is stored together but accessed separately by a running
256
+ Vayacondios server. An organization could be the name of a user,
257
+ workgroup, application, or service using Vayacondios.
258
+
259
+ The next level is the **topic**. Each topic within Vayacondios has a
260
+ single stash and can have multiple events. An "object" like a server,
261
+ a database, an application, a service, or a user maps to the concept
262
+ of "topic".
263
+
264
+ Every `wu` tool running within a deploy pack takes an additional
265
+ option `--vcd` which turns on or off interactions with Vayacondios.
266
+ This option can be specified at runtime on the command-line as well as
267
+ via a configuration file. When not running "in Vayacondios mode"
268
+ (with `--vcd` was not passed), interactions with Vayacondios will be
269
+ logged instead of transmitted and received.
270
+
271
+ ### Configuring Vayacondios access
272
+
273
+ If you don't intend to interact with a Vayacondios server, you can
274
+ just set `vcd` to `false` for your whole environment and skip this
275
+ section (as is done, for example, in the `test` environment by
276
+ default).
277
+
278
+ If you intend to interact with Vayacondios then you need to also
279
+ specify the `vcd_host` and `vcd_port` options which otherwise default
280
+ to the usual Vayacondios server port running on localhost.
281
+
282
+ ```yaml
283
+ ---
284
+ # in config/environments/production.yml
285
+ vcd_host: 10.123.123.123
286
+ vcd_port: 9000
287
+ ```
288
+
289
+ Vayacondios also requires that all events and stashes are stored under
290
+ a given organization name. The Vayacondios organization, which will
291
+ likely be shared across all environments of your application, is
292
+ usually set at the top-level:
293
+
294
+ ```yaml
295
+ ---
296
+ # in config/settings.yml
297
+ organization: my_company
298
+ ```
299
+
300
+ ### Handle out of band event data with Events
301
+
302
+ Despite being designed to be powerful and scalable, Vayacondios is not
303
+ the appropriate store for high-volume, high-throughput,
304
+ mission-critical data which must be persisited over the long-term.
305
+ Instead it should be used for "out of band" data, which is typically
306
+ much smaller in volume and throughput than the main body of a
307
+ dataflow. Examples of such out of band events include:
308
+
309
+ * signalling some intermittend or runtime error
310
+ * warning that some event was bad or suspicious
311
+ * logging an error
312
+ * registering some periodic metric
313
+ * signaling a change in state
314
+
315
+ Announcements can be made from anywhere within the Wukong framework by
316
+ accessing the `Wukong::Deploy.vayacondios_client` object but the most
317
+ common approach is to announce events within a processor or within a
318
+ dataflow.
319
+
320
+ #### Announcing from a processor
321
+
322
+ The `Wukong::Processor#announce` method can be used to directly send
323
+ an event to Vayacondios on a given topic.
324
+
325
+ ```ruby
326
+ Wukong.processor(:parser) do
327
+ def process line
328
+ yield parse!(line)
329
+ rescue ParseError => e
330
+ announce "parser.errors", line: line
331
+ end
332
+ end
333
+ ```
334
+
335
+ It's important when setting up an announcement like this that you
336
+ consider how often this piece of code will actually send events to
337
+ Vayacondios. If a `ParseError` is triggered once in every 10,000
338
+ lines, this may be perfectly fine to be running in production. If 1
339
+ in 10 lines causes a similar error, this may not be the right
340
+ approach.
341
+
342
+ #### Announcing from a dataflow
343
+
344
+ The `announce` processor can be used to send all announce all incoming
345
+ events to Vayacondios. Here's an example flow which makes use of it:
346
+
347
+ ```ruby
348
+ Wukong.dataflow(:parse_source) do
349
+ parser |
350
+ [
351
+ select(&:valid?) | ... | to_json,
352
+ select(&:invalid?) | announce(topic: "invalid_records")
353
+ ]
354
+ end
355
+ ```
356
+
357
+ Just as in the above example with a processor, it's important that the
358
+ flow through the announce processor is not incredibly high-volume.
359
+
360
+ The `announce` processor is terminal; it yields no output records.
361
+
362
+ ### Allow dynamic configuration with Stashes
363
+
364
+ The deploy pack inside a backend system like Hadoop or Storm can fetch
365
+ stashes from Vayacondios during runtime. Other systems external to
366
+ the deploy pack can simultaneously be writing data into these same
367
+ stashes in Vayacondios, allowing for a lightweight, two-way
368
+ communication stream between the deploy pack and arbitrary external
369
+ resources, mediated by a key-value store (the Vayacondios stash).
370
+
371
+ Stashes can be read and written from anywhere within the Wukong
372
+ framework by accessing the `Wukong::Deploy.vayacondios_client` object
373
+ but there are two special places where encapsulated, remote settings
374
+ are very useful.
375
+
376
+ #### Dynamic settings for the deploy pack itself
377
+
378
+ Each deploy pack, as an application, can fetch a stash of settings
379
+ from Vayacondios and use this as bootup time in the same way it uses a
380
+ configuration file ond disk. All that is required is a Vayacondios
381
+ stash topic name. This is furnished by providing to the deploy pack
382
+ an `application` name in a configuration file, usually the top-level
383
+ one:
384
+
385
+ ```yaml
386
+ ---
387
+ # in config/settings.yml, for example
388
+
389
+ application: my_app
390
+ ```
391
+
392
+ When any `wu` tool is launched within the deploy pack with the `--vcd`
393
+ option (possibly set an an environment-wide level via a configuration
394
+ file) then remote settings from Vayacondios for the `application` will
395
+ be pulled at boot-time and merged into the local settings from
396
+ configuration files and the command-line.
397
+
398
+ #### Dynamic settings for processors
399
+
400
+ The processor `tokenizer` in the deploy pack with application name
401
+ `my_app` defaults to using the stash with topic
402
+ `processors.my_app-tokenizer` in Vayacondios to store its settings
403
+ (this can be changed by overriding the `Wukong::Processor#vcd_topic`
404
+ method).
405
+
406
+ These settings, if they exist, can be retrieved and merged into the
407
+ processor's current fields at anytime using the
408
+ `Wukong::Processor#update_settings`. A common use case is to want to
409
+ update a processor's fields every 30 seconds, or similar. This is
410
+ most easily accomplished via the
411
+ `Wukong::Processor#update_settings_every` method. Here's an example
412
+
413
+ ```ruby
414
+ Wukong.processor(:tagger) do
415
+ field :tags, Array, doc: "List of tags to check", default: []
416
+
417
+ def setup
418
+ update_settings_every(30)
419
+ end
420
+
421
+ def process record
422
+ tags.each do |tag|
423
+ ...
424
+ end
425
+ end
426
+ end
427
+ ```
428
+
429
+ The `tags` field of this processor will be updated every 30 seconds
430
+ with the latest values from Vayacondios.
431
+
432
+ The `Wukong::Processor#save_settings` and
433
+ `Wukong::Processor#save_settings_every` and methods can be used to
434
+ save settings from a processor **to** Vayacondios.
@@ -1,17 +1,6 @@
1
1
  # = config/boot.rb
2
2
  #
3
3
  # Loads the Ruby language framework and activates Bundler.
4
- #
5
- # We can use a system-wide bundle installation _or_ a standalone
6
- # bundle installation. If a standalone installation exists (at
7
- # /bundle/bundler/setup.rb), we will detect and use it. Otherwise, we
8
- # fall back on the system-wide bundle installation.
9
- #
10
- # Make sure you install the bundle locally with the following command:
11
- #
12
- # @example Install a standalone bundle of gems
13
- #
14
- # $ bundle install --standalone
15
4
 
16
5
  local_setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
17
6
  if File.exist?(local_setup_path)
@@ -0,0 +1,5 @@
1
+ # EventMachine hard-codes a very inconvenient maximum length for a
2
+ # line -- 16K. This is too small for many use cases and is enlarged
3
+ # here to 16M.
4
+ EventMachine::Protocols::LineAndTextProtocol.send(:remove_const, :MaxLineLength)
5
+ EventMachine::Protocols::LineAndTextProtocol.send(:const_set, :MaxLineLength, 16*1024*1024)
@@ -35,5 +35,10 @@ a.out
35
35
  /doc/*
36
36
  /pkg/*
37
37
 
38
+ # bundler -- do NOT add Gemfile.lock to this list
38
39
  .bundle
39
40
  bundle/*
41
+
42
+ # configuration files that are dropped in outside of git
43
+ config/development.yml
44
+ config/deploy.yml
@@ -31,11 +31,11 @@ Gem::Specification.new do |gem|
31
31
  EOF
32
32
 
33
33
  gem.files = `git ls-files`.split("\n")
34
- gem.executables = ['wu-deploy','wu']
34
+ gem.executables = ['wu-deploy']
35
35
  gem.test_files = gem.files.grep(/^spec/)
36
36
  gem.require_paths = ['lib']
37
37
 
38
- gem.add_dependency('wukong', '3.0.1')
38
+ gem.add_dependency('wukong', '4.0.0')
39
39
  gem.add_dependency('rake', '~> 0.9')
40
40
  gem.add_dependency('erubis')
41
41
  gem.add_dependency('diffy')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wukong-deploy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2013-03-07 00:00:00.000000000 Z
14
+ date: 2014-03-19 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: wukong
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - '='
22
22
  - !ruby/object:Gem::Version
23
- version: 3.0.1
23
+ version: 4.0.0
24
24
  type: :runtime
25
25
  prerelease: false
26
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -28,7 +28,7 @@ dependencies:
28
28
  requirements:
29
29
  - - '='
30
30
  - !ruby/object:Gem::Version
31
- version: 3.0.1
31
+ version: 4.0.0
32
32
  - !ruby/object:Gem::Dependency
33
33
  name: rake
34
34
  requirement: !ruby/object:Gem::Requirement
@@ -91,7 +91,6 @@ description: ! " The Infochimps Platform is an end-to-end, managed solution for
91
91
  email: coders@infochimps.com
92
92
  executables:
93
93
  - wu-deploy
94
- - wu
95
94
  extensions: []
96
95
  extra_rdoc_files: []
97
96
  files:
@@ -102,17 +101,25 @@ files:
102
101
  - LICENSE.md
103
102
  - README.md
104
103
  - Rakefile
105
- - bin/wu
106
104
  - bin/wu-deploy
107
105
  - examples/.gitkeep
108
106
  - lib/wukong-deploy.rb
109
107
  - lib/wukong-deploy/console.rb
110
108
  - lib/wukong-deploy/deploy_pack.rb
111
109
  - lib/wukong-deploy/deploy_runner.rb
112
- - lib/wukong-deploy/extensions.rb
110
+ - lib/wukong-deploy/dump_extensions.rb
111
+ - lib/wukong-deploy/extensions/ftp_syncer.rb
113
112
  - lib/wukong-deploy/extensions/hadoop_invocation_override.rb
113
+ - lib/wukong-deploy/extensions/prepare_syncer.rb
114
+ - lib/wukong-deploy/extensions/s3_syncer.rb
115
+ - lib/wukong-deploy/extensions/storm_invocation_override.rb
116
+ - lib/wukong-deploy/extensions/uses_file_state.rb
117
+ - lib/wukong-deploy/extensions/uses_lockfile.rb
118
+ - lib/wukong-deploy/hadoop_extensions.rb
114
119
  - lib/wukong-deploy/rake_runner.rb
115
120
  - lib/wukong-deploy/repo.rb
121
+ - lib/wukong-deploy/storm_extensions.rb
122
+ - lib/wukong-deploy/sync_extensions.rb
116
123
  - lib/wukong-deploy/tasks.rb
117
124
  - lib/wukong-deploy/templater.rb
118
125
  - lib/wukong-deploy/templater/conflict_resolution.rb
@@ -131,6 +138,7 @@ files:
131
138
  - templates/config/environments/development.yml.erb
132
139
  - templates/config/environments/production.yml.erb
133
140
  - templates/config/environments/test.yml.erb
141
+ - templates/config/initializers/event_machine.rb.erb
134
142
  - templates/config/settings.yml.erb
135
143
  - templates/gitignore
136
144
  - templates/spec/spec_helper.rb.erb
@@ -150,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
150
158
  version: '0'
151
159
  segments:
152
160
  - 0
153
- hash: 687970152159308186
161
+ hash: -3128770076158943322
154
162
  required_rubygems_version: !ruby/object:Gem::Requirement
155
163
  none: false
156
164
  requirements:
@@ -159,10 +167,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
159
167
  version: '0'
160
168
  segments:
161
169
  - 0
162
- hash: 687970152159308186
170
+ hash: -3128770076158943322
163
171
  requirements: []
164
172
  rubyforge_project:
165
- rubygems_version: 1.8.24
173
+ rubygems_version: 1.8.23
166
174
  signing_key:
167
175
  specification_version: 3
168
176
  summary: Defines the deploy pack framework used by the Infochimps Platform
data/bin/wu DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'shellwords'
3
- now=Time.now.strftime("%Y-%m-%d %H:%M:%S")
4
- if ARGV.empty?
5
- $stderr.puts "ERROR #{now} [wu ] -- Must provide a Wukong command to run. Try the --help option."
6
- else
7
- if ARGV.size == 1 && ARGV.first == '--help'
8
- $stderr.puts <<EOF
9
- usage: wu COMMAND [OPTIONS] [ARG] ...
10
-
11
- wu is a wrapper for easy use of Wukong's command-line tools. It takes
12
- your arguments, constructs the name of the proper wu-tool to call, and
13
- prepends a call to bundle exec.
14
-
15
- $ wu local ...
16
-
17
- is equivalent to
18
-
19
- $ bundle exec wu-local ...
20
-
21
- You can run any of the wu-tools like
22
-
23
- wu-local
24
- wu-hadoop
25
- wu-deploy
26
-
27
- EOF
28
- else
29
- if ARGV.first =~ /^-/
30
- $stderr.puts "ERROR ${now} [wu ] -- First argument must be the name of a wu tool to run, got <${1}>"
31
- else
32
- Kernel.exec "bundle exec wu-#{Shellwords.join(ARGV)}"
33
- end
34
- end
35
- end