wukong-deploy 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +59 -0
- data/.rspec +2 -0
- data/Gemfile +3 -0
- data/README.md +94 -0
- data/Rakefile +10 -0
- data/bin/wu-deploy +7 -0
- data/lib/wukong-deploy.rb +26 -0
- data/lib/wukong-deploy/configuration.rb +23 -0
- data/lib/wukong-deploy/console.rb +15 -0
- data/lib/wukong-deploy/deploy_pack.rb +151 -0
- data/lib/wukong-deploy/driver.rb +41 -0
- data/lib/wukong-deploy/repo.rb +66 -0
- data/lib/wukong-deploy/tasks.rb +13 -0
- data/lib/wukong-deploy/templater.rb +65 -0
- data/lib/wukong-deploy/version.rb +7 -0
- data/templates/Gemfile.erb +40 -0
- data/templates/README.md.erb +1 -0
- data/templates/Rakefile.erb +10 -0
- data/templates/config/application.rb.erb +66 -0
- data/templates/config/boot.rb.erb +21 -0
- data/templates/config/environment.rb.erb +8 -0
- data/templates/config/environments/development.yml.erb +8 -0
- data/templates/config/environments/production.yml.erb +10 -0
- data/templates/config/environments/test.yml.erb +8 -0
- data/templates/config/settings.yml.erb +54 -0
- data/templates/gitignore +39 -0
- data/templates/spec/spec_helper.rb.erb +13 -0
- data/wukong-deploy.gemspec +47 -0
- metadata +183 -0
data/.gitignore
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
## OS
|
2
|
+
.DS_Store
|
3
|
+
Icon
|
4
|
+
nohup.out
|
5
|
+
.bak
|
6
|
+
|
7
|
+
*.pem
|
8
|
+
|
9
|
+
## EDITORS
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
\#*\#
|
13
|
+
*~
|
14
|
+
*.swp
|
15
|
+
REVISION
|
16
|
+
TAGS*
|
17
|
+
tmtags
|
18
|
+
*_flymake.*
|
19
|
+
*_flymake
|
20
|
+
*.tmproj
|
21
|
+
.project
|
22
|
+
.settings
|
23
|
+
|
24
|
+
## COMPILED
|
25
|
+
a.out
|
26
|
+
*.o
|
27
|
+
*.pyc
|
28
|
+
*.so
|
29
|
+
|
30
|
+
## OTHER SCM
|
31
|
+
.bzr
|
32
|
+
.hg
|
33
|
+
.svn
|
34
|
+
|
35
|
+
## PROJECT::GENERAL
|
36
|
+
|
37
|
+
log/*
|
38
|
+
tmp/*
|
39
|
+
pkg/*
|
40
|
+
|
41
|
+
coverage
|
42
|
+
rdoc
|
43
|
+
doc
|
44
|
+
pkg
|
45
|
+
.rake_test_cache
|
46
|
+
.bundle
|
47
|
+
.yardoc
|
48
|
+
|
49
|
+
.vendor
|
50
|
+
|
51
|
+
## PROJECT::SPECIFIC
|
52
|
+
|
53
|
+
old/*
|
54
|
+
docpages
|
55
|
+
away
|
56
|
+
|
57
|
+
.rbx
|
58
|
+
Gemfile.lock
|
59
|
+
Backup*of*.numbers
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# Wukong Deploy Pack
|
2
|
+
|
3
|
+
The [Infochimps Platform](http://www.infochimps.com) is an end-to-end,
|
4
|
+
managed solution for building Big Data applications. It integrates
|
5
|
+
best-of-breed technologies like [Hadoop](http://hadoop.apache.org/),
|
6
|
+
[Storm](https://github.com/nathanmarz/storm),
|
7
|
+
[Kafka](http://incubator.apache.org/kafka/),
|
8
|
+
[MongoDB](http://www.mongodb.org/),
|
9
|
+
[ElasticSearch](http://www.elasticsearch.org/),
|
10
|
+
[HBase](http://hbase.apache.org/), &c. and provides simple interfaces
|
11
|
+
for accessing these powerful tools.
|
12
|
+
|
13
|
+
Computation, analytics, scripting, &c. are all handled by
|
14
|
+
[Wukong](http://github.com/infochimps-labs/wukong) within the
|
15
|
+
platform. Wukong is an abstract framework for defining computations
|
16
|
+
on data. Wukong processors and flows can run in many different
|
17
|
+
execution contexts including:
|
18
|
+
|
19
|
+
* locally on the command-line for testing or development purposes
|
20
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
21
|
+
* within Storm as part of a real-time data flow
|
22
|
+
|
23
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
24
|
+
developers to develop all their processors, flows, and jobs within.
|
25
|
+
The deploy pack can be thought of as a container for all the necessary
|
26
|
+
Wukong code and plugins useful in the context of an Infochimps
|
27
|
+
Platform application. It includes the following libraries:
|
28
|
+
|
29
|
+
* <a href="http://github.com/infochimps-labs/wukong">wukong</a>: The core framework for writing processors and chaining them together.
|
30
|
+
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
31
|
+
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
The deploy pack is installed as a RubyGem:
|
36
|
+
|
37
|
+
```
|
38
|
+
$ sudo gem install wukong-deploy
|
39
|
+
```
|
40
|
+
|
41
|
+
## File Structure
|
42
|
+
|
43
|
+
A deploy pack is a repository with the following
|
44
|
+
[Rails](http://rubyonrails.org/)-like file structure:
|
45
|
+
|
46
|
+
```
|
47
|
+
├── app
|
48
|
+
│ ├── models
|
49
|
+
│ ├── processors
|
50
|
+
│ ├── flows
|
51
|
+
│ └── jobs
|
52
|
+
├── config
|
53
|
+
│ ├── environment.rb
|
54
|
+
│ ├── application.rb
|
55
|
+
│ ├── initializers
|
56
|
+
│ ├── settings.yml
|
57
|
+
│ └── environments
|
58
|
+
│ ├── development.yml
|
59
|
+
│ ├── production.yml
|
60
|
+
│ └── test.yml
|
61
|
+
├── data
|
62
|
+
├── Gemfile
|
63
|
+
├── Gemfile.lock
|
64
|
+
├── lib
|
65
|
+
├── log
|
66
|
+
├── Rakefile
|
67
|
+
├── spec
|
68
|
+
│ ├── spec_helper.rb
|
69
|
+
│ └── support
|
70
|
+
└── tmp
|
71
|
+
```
|
72
|
+
|
73
|
+
Let's look at it piece by piece:
|
74
|
+
|
75
|
+
* <b>app</b>: The directory with all the action. It's where you define:
|
76
|
+
* <b>models</b>: Your domain models or "nouns", which define and wrap the different kinds of data elements in your application. They are built using whatever framework you like (defaults to [Gorillib](http://github.com/infochimps-labs/gorillib))
|
77
|
+
* <b>processors</b>: Your fundamental operations or "verbs", which are passed records and parse, filter, augment, normalize, or split them.
|
78
|
+
* <b>flows</b>: Chain together processors into streaming flows for ingestion, real-time processing, or [complex event processing](http://en.wikipedia.org/wiki/Complex_event_processing) (CEP)
|
79
|
+
* <b>jobs</b>: Pair processors together to create batch jobs to run in Hadoop
|
80
|
+
* <b>config</b>: Where you place all application configuration for all environments
|
81
|
+
* <b>environment.rb</b>: Defines the runtime environment for all code, requiring and configuring all Wukong framework code. You shouldn't have to edit this file directly.
|
82
|
+
* <b>application.rb</b>: Require and configure libraries specific to your application. Choose a model framework, pick what application code gets loaded by default (vs. auto-loaded).
|
83
|
+
* <b>initializers</b>: Holds any files you need to load before <b>application.rb</b> here. Useful for requiring and configuring external libraries.
|
84
|
+
* <b>settings.yml</b>: Defines application-wide settings.
|
85
|
+
* <b>environments</b>: Defines environment-specific settings in YAML files named after the environment. Overrides <b>config/settings.yml</b>.
|
86
|
+
* <b>data</b>: Holds sample data in flat files. You'll develop and test your application using this data.
|
87
|
+
* <b>Gemfile</b> and <b>Gemfile.lock</b>: Defines how libraries are resolved with [Bundler](http://gembundler.com/).
|
88
|
+
* <b>lib</b>: Holds any code you want to use in your application but that isn't "part of" your application (like vendored libraries, Rake tasks, &c.).
|
89
|
+
* <b>log</b>: A good place to stash logs.
|
90
|
+
* <b>Rakefile</b>: Defines [Rake](http://rake.rubyforge.org/) tasks for the development, test, and deploy of your application.
|
91
|
+
* <b>spec</b>: Holds all your [RSpec](http://rspec.info/) unit tests.
|
92
|
+
* <b>spec_helper.rb</b>: Loads libraries you'll use during testing, includes spec helper libraries from Wukong.
|
93
|
+
* <b>support</b>: Holds support code for your tests.
|
94
|
+
* <b>tmp</b>: A good place to stash temporary files.
|
data/Rakefile
ADDED
data/bin/wu-deploy
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Require Wukong so we can write processors & dataflows and use
|
2
|
+
# build-in widgets.
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
# Require just enough of wukong-hadoop to give us changes at the
|
6
|
+
# processor level. The rest is only required when running the
|
7
|
+
# 'wu-hadoop' program. Also grab configuration since we'll need it.
|
8
|
+
require 'wukong-hadoop/extensions'
|
9
|
+
require 'wukong-hadoop/configuration'
|
10
|
+
|
11
|
+
# Grab configuration from Wonderdog.
|
12
|
+
require 'wonderdog'
|
13
|
+
|
14
|
+
module Wukong
|
15
|
+
# Provides some helper methods for loading the assets within a
|
16
|
+
# deploy pack at runtime and for Wukong plugins to ask questions
|
17
|
+
# about the deploy pack they're running in.
|
18
|
+
module Deploy
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
# Now load the rest of the deploy pack.
|
24
|
+
require 'wukong-deploy/deploy_pack'
|
25
|
+
require 'wukong-deploy/configuration'
|
26
|
+
require 'wukong-deploy/driver'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Pass in +settings+ that need to be modified for deploy-specific
|
5
|
+
# settings *before* they are resolved.
|
6
|
+
#
|
7
|
+
# @param [Configliere::Param] settings
|
8
|
+
# @return [Configliere::Param] the newly configured settings
|
9
|
+
def self.configure settings
|
10
|
+
settings.define(:environment, :description => "The environment to run in", :default => 'development', :flag => 'e', :env_var => 'ICS_PLATFORM_ENV')
|
11
|
+
|
12
|
+
case executable
|
13
|
+
when 'wu-hadoop'
|
14
|
+
Wukong::Elasticsearch.configure(settings) if executable == 'wu-hadoop'
|
15
|
+
end
|
16
|
+
settings
|
17
|
+
end
|
18
|
+
|
19
|
+
# All configuration for Wukong::Deploy lives in this object.
|
20
|
+
Configuration = configure(Configliere::Param.new) unless defined?(Configuration)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Boot the deploy pack, reading all available +settings+ and
|
5
|
+
# rooting it at the given +path+.
|
6
|
+
#
|
7
|
+
# @param [Configliere::Param] settings
|
8
|
+
# @param [String] path
|
9
|
+
def self.boot! settings, path
|
10
|
+
@pre_deploy_settings = settings.dup
|
11
|
+
@settings = settings
|
12
|
+
@root = Pathname.new(path)
|
13
|
+
read_common_settings
|
14
|
+
read_environment_settings
|
15
|
+
read_remote_settings
|
16
|
+
end
|
17
|
+
|
18
|
+
# Return the root directory of this deploy pack.
|
19
|
+
#
|
20
|
+
# @return [Pathname]
|
21
|
+
def self.root
|
22
|
+
@root
|
23
|
+
end
|
24
|
+
|
25
|
+
# Recursively require each Ruby file +dir+.
|
26
|
+
#
|
27
|
+
# @example Requiring all .rb files anywhere within /lib/my_lib
|
28
|
+
#
|
29
|
+
# Wukong::Deploy.require_recursive("lib/my_lib")
|
30
|
+
#
|
31
|
+
# @param [String] glob
|
32
|
+
def self.require_recursive glob
|
33
|
+
Dir[root.join("#{glob}/**/*.rb")].each { |path| require(path) }
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return the name of the executable program currently running.
|
37
|
+
#
|
38
|
+
# @return [String]
|
39
|
+
def self.executable
|
40
|
+
File.basename($0)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the current environment the deploy pack is in.
|
44
|
+
#
|
45
|
+
# @return [String]
|
46
|
+
def self.environment
|
47
|
+
settings[:environment]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return the deploy pack's own settings.
|
51
|
+
#
|
52
|
+
# @return [Configliere::Param]
|
53
|
+
def self.settings
|
54
|
+
@settings
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return the settings the deploy pack had *before* it booted
|
58
|
+
# itself -- these are used to pass to other tools when invoking
|
59
|
+
# them, since they will read the deploy pack settings themselves
|
60
|
+
# anyway.
|
61
|
+
#
|
62
|
+
# @return [Configliere::Param]
|
63
|
+
def self.pre_deploy_settings
|
64
|
+
@pre_deploy_settings
|
65
|
+
end
|
66
|
+
|
67
|
+
# The root directory of this deploy pack.
|
68
|
+
#
|
69
|
+
# @return [Pathname]
|
70
|
+
def self.root
|
71
|
+
@root
|
72
|
+
end
|
73
|
+
|
74
|
+
# The directory this deploy pack keeps local, sample data in.
|
75
|
+
#
|
76
|
+
# @return [Pathname]
|
77
|
+
def self.data_dir
|
78
|
+
root.join('data')
|
79
|
+
end
|
80
|
+
|
81
|
+
# The directory this deploy pack uses for configuration files.
|
82
|
+
#
|
83
|
+
# @return [Pathname]
|
84
|
+
def self.config_dir
|
85
|
+
root.join('config')
|
86
|
+
end
|
87
|
+
|
88
|
+
# The directory this deploy pack uses for temporary files.
|
89
|
+
#
|
90
|
+
# @return [Pathname]
|
91
|
+
def self.tmp_dir
|
92
|
+
root.join('tmp')
|
93
|
+
end
|
94
|
+
|
95
|
+
# The directory this deploy pack uses for logs.
|
96
|
+
#
|
97
|
+
# @return [Pathname]
|
98
|
+
def self.log_dir
|
99
|
+
root.join('log')
|
100
|
+
end
|
101
|
+
|
102
|
+
# The diretory this deploy pack puts all its application code in.
|
103
|
+
#
|
104
|
+
# @return [Pathname]
|
105
|
+
def self.app_dir
|
106
|
+
root.join('app')
|
107
|
+
end
|
108
|
+
|
109
|
+
# The logger for this deploy pack.
|
110
|
+
#
|
111
|
+
# @return [Logger]
|
112
|
+
def self.logger
|
113
|
+
return @logger if @logger
|
114
|
+
# FIXME -- want to use the Wukong logger here...
|
115
|
+
require 'logger'
|
116
|
+
@logger ||= defined?(Log) ? Log : Logger.new
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
# Read settings common across all environments from
|
122
|
+
# config/settings.yml.
|
123
|
+
def self.read_common_settings
|
124
|
+
read_settings_from_file(settings, config_dir.join("settings.yml"))
|
125
|
+
end
|
126
|
+
|
127
|
+
# Read settings unique to this deploy pack's current environment
|
128
|
+
# from config/ENVIRONMENT.yml.
|
129
|
+
def self.read_environment_settings
|
130
|
+
read_settings_from_file(settings, config_dir.join("environments", "#{environment}.yml"))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Update +settings+ with the configuration at the given +path+.
|
134
|
+
#
|
135
|
+
# @param [Configliere::Param] settings
|
136
|
+
# @param [String, Pathname] path
|
137
|
+
def self.read_settings_from_file settings, path
|
138
|
+
if File.exist?(path) && File.readable?(path) && File.file?(path)
|
139
|
+
settings.read(path)
|
140
|
+
else
|
141
|
+
logger.warn("Cannot read settings file at #{path}.")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Read remote settings.
|
146
|
+
#
|
147
|
+
# FIXME -- not implemented yet.
|
148
|
+
def self.read_remote_settings
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
class Driver
|
6
|
+
|
7
|
+
attr_accessor :settings
|
8
|
+
attr_accessor :args
|
9
|
+
|
10
|
+
def self.run(settings, *args)
|
11
|
+
begin
|
12
|
+
new(settings,*args).run!
|
13
|
+
rescue Wukong::Error => e
|
14
|
+
$stderr.puts(e.message)
|
15
|
+
exit(1)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def run!
|
20
|
+
case args.first
|
21
|
+
when 'new'
|
22
|
+
require_relative('templater')
|
23
|
+
raise Error.new("Must provide a path to the root of the deploy pack you want to create") if args[1].nil? || args[1].blank?
|
24
|
+
Templater.new(File.expand_path(args[1], Dir.pwd)).create
|
25
|
+
when 'console'
|
26
|
+
require_relative('console')
|
27
|
+
Wukong::Deploy::Console.new.run!
|
28
|
+
else
|
29
|
+
settings.dump_help
|
30
|
+
exit(2)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize settings, *args
|
35
|
+
self.settings = settings
|
36
|
+
self.args = args
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
class Repo
|
4
|
+
|
5
|
+
TOP_LEVEL_DIR_NAMES = %w[app config data lib log script spec tmp]
|
6
|
+
|
7
|
+
attr_reader :root
|
8
|
+
|
9
|
+
def initialize root
|
10
|
+
@root = Pathname.new(root)
|
11
|
+
end
|
12
|
+
|
13
|
+
TOP_LEVEL_DIR_NAMES.each do |name|
|
14
|
+
define_method("#{name}_dir") do |*args|
|
15
|
+
root.join(name, *args)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def dirs_to_create
|
20
|
+
[
|
21
|
+
root,
|
22
|
+
app_dir('models'),
|
23
|
+
app_dir('processors'),
|
24
|
+
app_dir('flows'),
|
25
|
+
app_dir('jobs'),
|
26
|
+
|
27
|
+
config_dir('environments'),
|
28
|
+
config_dir('initializers'),
|
29
|
+
|
30
|
+
data_dir,
|
31
|
+
lib_dir,
|
32
|
+
log_dir,
|
33
|
+
script_dir,
|
34
|
+
|
35
|
+
spec_dir('models'),
|
36
|
+
spec_dir('processors'),
|
37
|
+
spec_dir('flows'),
|
38
|
+
spec_dir('jobs'),
|
39
|
+
spec_dir('support'),
|
40
|
+
|
41
|
+
tmp_dir
|
42
|
+
]
|
43
|
+
end
|
44
|
+
|
45
|
+
def files_to_template
|
46
|
+
%w[
|
47
|
+
config/boot.rb
|
48
|
+
config/application.rb
|
49
|
+
config/environment.rb
|
50
|
+
config/settings.yml
|
51
|
+
config/environments/development.yml
|
52
|
+
config/environments/test.yml
|
53
|
+
config/environments/production.yml
|
54
|
+
|
55
|
+
Gemfile
|
56
|
+
Rakefile
|
57
|
+
README.md
|
58
|
+
|
59
|
+
spec/spec_helper.rb
|
60
|
+
]
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
namespace 'bundler' do
|
4
|
+
desc "Remove local bundle repository"
|
5
|
+
task :clean do
|
6
|
+
Wukong::Deploy.logger.info "Removing local bundle at /.bundle and /bundle"
|
7
|
+
sh "rm -rf .bundle bundle"
|
8
|
+
Wukong::Deploy.logger.info "Make sure to run 'bundle install --standalone' to re-create the bundle"
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'erubis'
|
4
|
+
|
5
|
+
require_relative('repo')
|
6
|
+
|
7
|
+
module Wukong
|
8
|
+
module Deploy
|
9
|
+
class Templater
|
10
|
+
|
11
|
+
attr_accessor :repo
|
12
|
+
attr_accessor :options
|
13
|
+
|
14
|
+
include FileUtils::Verbose
|
15
|
+
|
16
|
+
def initialize root, options={}
|
17
|
+
self.repo = Repo.new(root)
|
18
|
+
self.options = options
|
19
|
+
end
|
20
|
+
|
21
|
+
def create
|
22
|
+
create_dirs
|
23
|
+
create_templates
|
24
|
+
create_gitkeeps
|
25
|
+
create_gitignore
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_dirs
|
29
|
+
repo.dirs_to_create.each { |dir| mkdir_p(dir) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def create_templates
|
33
|
+
repo.files_to_template.each do |repo_path|
|
34
|
+
create_template(templates_dir.join("#{repo_path}.erb"), repo.root.join(repo_path))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def create_template input_path, output_path, binding={}
|
39
|
+
input = File.read(input_path)
|
40
|
+
erb = Erubis::Eruby.new(input)
|
41
|
+
output = erb.result(binding)
|
42
|
+
action = File.exist?(output_path) ? 'modify' : 'create'
|
43
|
+
puts "#{action} #{output_path}"
|
44
|
+
File.open(output_path, 'w') { |f| f.puts(output) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_gitkeeps
|
48
|
+
repo.dirs_to_create.each do |dir|
|
49
|
+
if Dir[File.join(dir, '*')].empty?
|
50
|
+
touch(File.join(dir, '.gitkeep'))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_gitignore
|
56
|
+
create_template(templates_dir.join('gitignore'), repo.root.join('.gitignore'))
|
57
|
+
end
|
58
|
+
|
59
|
+
def templates_dir
|
60
|
+
@templates_dir ||= Pathname.new(File.expand_path('../../../templates', __FILE__))
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Use the public [RubyGems server](http://rubygems.org/)
|
2
|
+
source :rubygems
|
3
|
+
|
4
|
+
# = Application
|
5
|
+
#
|
6
|
+
# The following dependencies are for your application.
|
7
|
+
#
|
8
|
+
# Does one of your processors require the UserAgent library because
|
9
|
+
# it's parsing HTTP User-Agent strings? Just insert
|
10
|
+
#
|
11
|
+
# gem 'useragent'
|
12
|
+
#
|
13
|
+
# here and then remember to run
|
14
|
+
#
|
15
|
+
# $ bundle install --standalone
|
16
|
+
#
|
17
|
+
# Here's a link to the [Bundler documentation](http://gembundler.com/)
|
18
|
+
# for your convenience.
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
#
|
26
|
+
# = Framework
|
27
|
+
#
|
28
|
+
# The following libraries are part of the deploy pack framework. You
|
29
|
+
# shouldn't need to change these.
|
30
|
+
#
|
31
|
+
|
32
|
+
gem 'wukong-deploy', :git => 'https://github.com/infochimps-labs/wukong-deploy'
|
33
|
+
gem 'wukong', :git => 'https://github.com/infochimps-labs/wukong', :branch => '3.0.0'
|
34
|
+
gem 'wukong-hadoop', :git => 'https://github.com/infochimps-labs/wukong-hadoop'
|
35
|
+
gem 'wonderdog', :git => 'https://github.com/infochimps-labs/wonderdog'
|
36
|
+
gem 'vayacondios-client', :git => 'https://github.com/infochimps-labs/vayacondios'
|
37
|
+
|
38
|
+
group :development do
|
39
|
+
gem 'rspec'
|
40
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Welcome to your new deploy pack.
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# = config/application.rb
|
2
|
+
#
|
3
|
+
# Loads the deploy pack application and all supporting libraries
|
4
|
+
# including Wukong and any of its plugins.
|
5
|
+
#
|
6
|
+
# The load order is as follows:
|
7
|
+
#
|
8
|
+
# * [Wukong-Deploy](http://github.com/infochimps-labs/wukong-deploy) which loads all Wukong and Wukong plugins.
|
9
|
+
# * Any code you put in the `config/initializers` directory. This is a good place to configure additional libraries to support your application.
|
10
|
+
# * Your application code itself, in the following order
|
11
|
+
# * models
|
12
|
+
# * processors
|
13
|
+
# * flows
|
14
|
+
# * jobs
|
15
|
+
# * anything else
|
16
|
+
#
|
17
|
+
# You can put code you want available but you don't intend to use in
|
18
|
+
# your application inside the `lib` directory.
|
19
|
+
|
20
|
+
#
|
21
|
+
# == Deploy Pack Framework
|
22
|
+
#
|
23
|
+
|
24
|
+
# Boot up first
|
25
|
+
require_relative("boot")
|
26
|
+
|
27
|
+
# Require the Wukong-Deploy plugin which will itself require and
|
28
|
+
# configure core Wukong and other plugins.
|
29
|
+
require 'wukong-deploy'
|
30
|
+
|
31
|
+
# Load initializers containing any requires or configuration code
|
32
|
+
# needed by the app.
|
33
|
+
Dir[File.dirname(__FILE__) + '/initializers/**/*.rb'].each { |path| require(path) }
|
34
|
+
|
35
|
+
#
|
36
|
+
# == Model Support
|
37
|
+
#
|
38
|
+
# We're going to use Gorillib, Infochimps' own library of useful
|
39
|
+
# methods and Ruby hacks.
|
40
|
+
require 'gorillib'
|
41
|
+
require 'gorillib/model/serialization'
|
42
|
+
require 'gorillib/object/blank'
|
43
|
+
|
44
|
+
#
|
45
|
+
# But you could use a more Rails-y support stack
|
46
|
+
#
|
47
|
+
# require 'active_support'
|
48
|
+
|
49
|
+
#
|
50
|
+
# Or DataMapper
|
51
|
+
#
|
52
|
+
# require 'dm-core'
|
53
|
+
|
54
|
+
#
|
55
|
+
# == Application Structure
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# By default, we try not to be clever and just load *everything* in
|
59
|
+
# app/. If you have a lot of application code, this may be slow on
|
60
|
+
# startup and you may want to only load common classes or define
|
61
|
+
# autoloads or whatever you want to do.
|
62
|
+
Dir[File.expand_path('../../app/models/**/*.rb', __FILE__)].each { |path| require(path) }
|
63
|
+
Dir[File.expand_path('../../app/processors/**/*.rb', __FILE__)].each { |path| require(path) }
|
64
|
+
Dir[File.expand_path('../../app/flows/**/*.rb', __FILE__)].each { |path| require(path) }
|
65
|
+
Dir[File.expand_path('../../app/jobs/**/*.rb', __FILE__)].each { |path| require(path) }
|
66
|
+
Dir[File.expand_path('../../app/**/*.rb', __FILE__)].each { |path| require(path) }
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# = config/boot.rb
|
2
|
+
#
|
3
|
+
# Loads the Ruby language framework and activates Bundler.
|
4
|
+
#
|
5
|
+
# We use a standalone bundle installation becaues it makes it easier
|
6
|
+
# to have non-Ruby code in underlying frameworks interact with the
|
7
|
+
# deploy pack.
|
8
|
+
#
|
9
|
+
# Make sure you install the bundle locally with the following command:
|
10
|
+
#
|
11
|
+
# @example Install a standalone bundle of gems
|
12
|
+
#
|
13
|
+
# $ bundle install --standalone
|
14
|
+
|
15
|
+
setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
|
16
|
+
begin
|
17
|
+
require setup_path
|
18
|
+
rescue LoadError => e
|
19
|
+
$stderr.puts("Failed to require \"#{setup_path}\". Try running\n\n bundle install --standalone\n\n")
|
20
|
+
exit(1)
|
21
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# = config/environment.rb
|
2
|
+
#
|
3
|
+
# Loads all the Ruby language and framework code that defines the
|
4
|
+
# runtime environment of your application.
|
5
|
+
|
6
|
+
require_relative("application")
|
7
|
+
|
8
|
+
Wukong::Deploy.boot!(Wukong::Deploy::Configuration, File.expand_path("../../", __FILE__))
|
@@ -0,0 +1,54 @@
|
|
1
|
+
---
|
2
|
+
|
3
|
+
# = config/settings.yml
|
4
|
+
#
|
5
|
+
# Any settings defined in this file will be merged into a global
|
6
|
+
# Settings object within any of the wu-tools which run inside this
|
7
|
+
# deploy pack.
|
8
|
+
#
|
9
|
+
# Settings will additionally be loaded from a file named after the
|
10
|
+
# ICS_PLATFORM_ENV environment variable and will override those
|
11
|
+
# declared here. The 'development' environment is the default
|
12
|
+
# environment loaded.
|
13
|
+
#
|
14
|
+
# Any settings specified on the command line when invoking wu-tools
|
15
|
+
# will take precedence over settings within any of these files.
|
16
|
+
#
|
17
|
+
|
18
|
+
|
19
|
+
#
|
20
|
+
# == Notifier
|
21
|
+
#
|
22
|
+
# Notifiers are used within a Processor for out of band event
|
23
|
+
# processing.
|
24
|
+
notifier:
|
25
|
+
|
26
|
+
# The type of notifier to use is better set on a per-environment
|
27
|
+
# basis. See the correspdoning YAML files in config/environments.
|
28
|
+
# type: log
|
29
|
+
|
30
|
+
#
|
31
|
+
# === Log notifier
|
32
|
+
#
|
33
|
+
# The log notifier will print each event to the log.
|
34
|
+
|
35
|
+
# A path to a log file.
|
36
|
+
# log: log/wu.log
|
37
|
+
|
38
|
+
# The priority level of the log
|
39
|
+
# level: info
|
40
|
+
|
41
|
+
#
|
42
|
+
# === HTTP notifier
|
43
|
+
#
|
44
|
+
# The HTTP notifier will send events over HTTP to an instance of the
|
45
|
+
# Infochimps Platform API
|
46
|
+
|
47
|
+
# The host to send each event to. Your host is typically determined
|
48
|
+
# by the name of your platform account.
|
49
|
+
# host: api.INSERT_ACCOUNT_NAME_HERE.chimpy.us
|
50
|
+
|
51
|
+
# The port to send each event to.
|
52
|
+
# port: 9000
|
53
|
+
|
54
|
+
# You can put processor-specfic settings within this file as well.
|
data/templates/gitignore
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# version control
|
2
|
+
.svn
|
3
|
+
.hg
|
4
|
+
.bzr
|
5
|
+
|
6
|
+
# editors
|
7
|
+
\#*
|
8
|
+
.#*
|
9
|
+
\.#*
|
10
|
+
*~
|
11
|
+
*.tmproj
|
12
|
+
*._flymake.*
|
13
|
+
*.project
|
14
|
+
|
15
|
+
# os x
|
16
|
+
.DS_Store
|
17
|
+
Icon?
|
18
|
+
|
19
|
+
# backup
|
20
|
+
.bak
|
21
|
+
|
22
|
+
# tools
|
23
|
+
TAGS*
|
24
|
+
nohop.out
|
25
|
+
a.out
|
26
|
+
*.pyc
|
27
|
+
*.so
|
28
|
+
*.stackdump
|
29
|
+
.yardoc
|
30
|
+
|
31
|
+
# commonly ignored files
|
32
|
+
/**/*DO_NOT_VERSION*
|
33
|
+
/**/log/*
|
34
|
+
/**/tmp/*
|
35
|
+
/doc/*
|
36
|
+
/pkg/*
|
37
|
+
|
38
|
+
.bundle
|
39
|
+
bundle/*
|
@@ -0,0 +1,13 @@
|
|
1
|
+
ENV["ICS_PLATFORM_ENV"] ||= "test"
|
2
|
+
|
3
|
+
require_relative('../config/environment')
|
4
|
+
|
5
|
+
# Require spec helpers from Wukong
|
6
|
+
require 'wukong/spec_helpers'
|
7
|
+
|
8
|
+
Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |path| require path }
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :rspec
|
12
|
+
include Wukong::SpecHelpers
|
13
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/wukong-deploy/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = 'wukong-deploy'
|
6
|
+
gem.homepage = 'https://github.com/infochimps-labs/wukong-deploy'
|
7
|
+
gem.licenses = ["Apache 2.0"]
|
8
|
+
gem.email = 'coders@infochimps.com'
|
9
|
+
gem.authors = ['Infochimps', 'Dhruv Bansal', 'Travis Dempsey']
|
10
|
+
gem.version = Wukong::Deploy::VERSION
|
11
|
+
|
12
|
+
gem.summary = 'Defines the deploy pack framework used by the Infochimps Platform'
|
13
|
+
gem.description = <<-EOF
|
14
|
+
The Infochimps Platform is an end-to-end, managed solution for
|
15
|
+
building Big Data applications. It integrates best-of-breed
|
16
|
+
technologies like Hadoop, Storm, Kafka, MongoDB, ElasticSearch,
|
17
|
+
HBase, &c. and provides simple interfaces for accessing these
|
18
|
+
powerful tools.
|
19
|
+
|
20
|
+
Computation, analytics, scripting, &c. are all handled by Wukong
|
21
|
+
within the platform. Wukong is an abstract framework for defining
|
22
|
+
computations on data. Wukong processors and flows can run in many
|
23
|
+
different execution contexts including:
|
24
|
+
|
25
|
+
* locally on the command-line for testing or development purposes
|
26
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
27
|
+
* within Storm as part of a real-time data flow
|
28
|
+
|
29
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
30
|
+
developers to develop all their processors, flows, and jobs within.
|
31
|
+
EOF
|
32
|
+
|
33
|
+
gem.files = `git ls-files`.split("\n")
|
34
|
+
gem.executables = ['wu-deploy']
|
35
|
+
gem.test_files = gem.files.grep(/^spec/)
|
36
|
+
gem.require_paths = ['lib']
|
37
|
+
|
38
|
+
gem.add_dependency('wukong', '3.0.0.pre2')
|
39
|
+
gem.add_dependency('wukong-hadoop')
|
40
|
+
gem.add_dependency('wonderdog')
|
41
|
+
gem.add_dependency('erubis')
|
42
|
+
|
43
|
+
gem.add_dependency('rake', '~> 0.9')
|
44
|
+
gem.add_development_dependency 'rspec', '~> 2'
|
45
|
+
|
46
|
+
end
|
47
|
+
|
metadata
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wukong-deploy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Infochimps
|
9
|
+
- Dhruv Bansal
|
10
|
+
- Travis Dempsey
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2012-12-01 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: wukong
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - '='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 3.0.0.pre2
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - '='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 3.0.0.pre2
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: wukong-hadoop
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: wonderdog
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
type: :runtime
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: erubis
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ! '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: rake
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0.9'
|
88
|
+
type: :runtime
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.9'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: rspec
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '2'
|
112
|
+
description: ! " The Infochimps Platform is an end-to-end, managed solution for\n
|
113
|
+
\ building Big Data applications. It integrates best-of-breed\n technologies like
|
114
|
+
Hadoop, Storm, Kafka, MongoDB, ElasticSearch,\n HBase, &c. and provides simple
|
115
|
+
interfaces for accessing these\n powerful tools.\n\n Computation, analytics, scripting,
|
116
|
+
&c. are all handled by Wukong\n within the platform. Wukong is an abstract framework
|
117
|
+
for defining\n computations on data. Wukong processors and flows can run in many\n
|
118
|
+
\ different execution contexts including:\n\n * locally on the command-line for
|
119
|
+
testing or development purposes\n * as a Hadoop mapper or reducer for batch analytics
|
120
|
+
or ETL\n * within Storm as part of a real-time data flow\n\n The Infochimps
|
121
|
+
Platform uses the concept of a deploy pack for\n developers to develop all their
|
122
|
+
processors, flows, and jobs within.\n"
|
123
|
+
email: coders@infochimps.com
|
124
|
+
executables:
|
125
|
+
- wu-deploy
|
126
|
+
extensions: []
|
127
|
+
extra_rdoc_files: []
|
128
|
+
files:
|
129
|
+
- .gitignore
|
130
|
+
- .rspec
|
131
|
+
- Gemfile
|
132
|
+
- README.md
|
133
|
+
- Rakefile
|
134
|
+
- bin/wu-deploy
|
135
|
+
- lib/wukong-deploy.rb
|
136
|
+
- lib/wukong-deploy/configuration.rb
|
137
|
+
- lib/wukong-deploy/console.rb
|
138
|
+
- lib/wukong-deploy/deploy_pack.rb
|
139
|
+
- lib/wukong-deploy/driver.rb
|
140
|
+
- lib/wukong-deploy/repo.rb
|
141
|
+
- lib/wukong-deploy/tasks.rb
|
142
|
+
- lib/wukong-deploy/templater.rb
|
143
|
+
- lib/wukong-deploy/version.rb
|
144
|
+
- templates/Gemfile.erb
|
145
|
+
- templates/README.md.erb
|
146
|
+
- templates/Rakefile.erb
|
147
|
+
- templates/config/application.rb.erb
|
148
|
+
- templates/config/boot.rb.erb
|
149
|
+
- templates/config/environment.rb.erb
|
150
|
+
- templates/config/environments/development.yml.erb
|
151
|
+
- templates/config/environments/production.yml.erb
|
152
|
+
- templates/config/environments/test.yml.erb
|
153
|
+
- templates/config/settings.yml.erb
|
154
|
+
- templates/gitignore
|
155
|
+
- templates/spec/spec_helper.rb.erb
|
156
|
+
- wukong-deploy.gemspec
|
157
|
+
homepage: https://github.com/infochimps-labs/wukong-deploy
|
158
|
+
licenses:
|
159
|
+
- Apache 2.0
|
160
|
+
post_install_message:
|
161
|
+
rdoc_options: []
|
162
|
+
require_paths:
|
163
|
+
- lib
|
164
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
165
|
+
none: false
|
166
|
+
requirements:
|
167
|
+
- - ! '>='
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: '0'
|
170
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
requirements: []
|
177
|
+
rubyforge_project:
|
178
|
+
rubygems_version: 1.8.23
|
179
|
+
signing_key:
|
180
|
+
specification_version: 3
|
181
|
+
summary: Defines the deploy pack framework used by the Infochimps Platform
|
182
|
+
test_files: []
|
183
|
+
has_rdoc:
|