wukong-deploy 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +59 -0
- data/.rspec +2 -0
- data/Gemfile +3 -0
- data/README.md +94 -0
- data/Rakefile +10 -0
- data/bin/wu-deploy +7 -0
- data/lib/wukong-deploy.rb +26 -0
- data/lib/wukong-deploy/configuration.rb +23 -0
- data/lib/wukong-deploy/console.rb +15 -0
- data/lib/wukong-deploy/deploy_pack.rb +151 -0
- data/lib/wukong-deploy/driver.rb +41 -0
- data/lib/wukong-deploy/repo.rb +66 -0
- data/lib/wukong-deploy/tasks.rb +13 -0
- data/lib/wukong-deploy/templater.rb +65 -0
- data/lib/wukong-deploy/version.rb +7 -0
- data/templates/Gemfile.erb +40 -0
- data/templates/README.md.erb +1 -0
- data/templates/Rakefile.erb +10 -0
- data/templates/config/application.rb.erb +66 -0
- data/templates/config/boot.rb.erb +21 -0
- data/templates/config/environment.rb.erb +8 -0
- data/templates/config/environments/development.yml.erb +8 -0
- data/templates/config/environments/production.yml.erb +10 -0
- data/templates/config/environments/test.yml.erb +8 -0
- data/templates/config/settings.yml.erb +54 -0
- data/templates/gitignore +39 -0
- data/templates/spec/spec_helper.rb.erb +13 -0
- data/wukong-deploy.gemspec +47 -0
- metadata +183 -0
data/.gitignore
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
## OS
|
2
|
+
.DS_Store
|
3
|
+
Icon
|
4
|
+
nohup.out
|
5
|
+
.bak
|
6
|
+
|
7
|
+
*.pem
|
8
|
+
|
9
|
+
## EDITORS
|
10
|
+
\#*
|
11
|
+
.\#*
|
12
|
+
\#*\#
|
13
|
+
*~
|
14
|
+
*.swp
|
15
|
+
REVISION
|
16
|
+
TAGS*
|
17
|
+
tmtags
|
18
|
+
*_flymake.*
|
19
|
+
*_flymake
|
20
|
+
*.tmproj
|
21
|
+
.project
|
22
|
+
.settings
|
23
|
+
|
24
|
+
## COMPILED
|
25
|
+
a.out
|
26
|
+
*.o
|
27
|
+
*.pyc
|
28
|
+
*.so
|
29
|
+
|
30
|
+
## OTHER SCM
|
31
|
+
.bzr
|
32
|
+
.hg
|
33
|
+
.svn
|
34
|
+
|
35
|
+
## PROJECT::GENERAL
|
36
|
+
|
37
|
+
log/*
|
38
|
+
tmp/*
|
39
|
+
pkg/*
|
40
|
+
|
41
|
+
coverage
|
42
|
+
rdoc
|
43
|
+
doc
|
44
|
+
pkg
|
45
|
+
.rake_test_cache
|
46
|
+
.bundle
|
47
|
+
.yardoc
|
48
|
+
|
49
|
+
.vendor
|
50
|
+
|
51
|
+
## PROJECT::SPECIFIC
|
52
|
+
|
53
|
+
old/*
|
54
|
+
docpages
|
55
|
+
away
|
56
|
+
|
57
|
+
.rbx
|
58
|
+
Gemfile.lock
|
59
|
+
Backup*of*.numbers
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# Wukong Deploy Pack
|
2
|
+
|
3
|
+
The [Infochimps Platform](http://www.infochimps.com) is an end-to-end,
|
4
|
+
managed solution for building Big Data applications. It integrates
|
5
|
+
best-of-breed technologies like [Hadoop](http://hadoop.apache.org/),
|
6
|
+
[Storm](https://github.com/nathanmarz/storm),
|
7
|
+
[Kafka](http://incubator.apache.org/kafka/),
|
8
|
+
[MongoDB](http://www.mongodb.org/),
|
9
|
+
[ElasticSearch](http://www.elasticsearch.org/),
|
10
|
+
[HBase](http://hbase.apache.org/), &c. and provides simple interfaces
|
11
|
+
for accessing these powerful tools.
|
12
|
+
|
13
|
+
Computation, analytics, scripting, &c. are all handled by
|
14
|
+
[Wukong](http://github.com/infochimps-labs/wukong) within the
|
15
|
+
platform. Wukong is an abstract framework for defining computations
|
16
|
+
on data. Wukong processors and flows can run in many different
|
17
|
+
execution contexts including:
|
18
|
+
|
19
|
+
* locally on the command-line for testing or development purposes
|
20
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
21
|
+
* within Storm as part of a real-time data flow
|
22
|
+
|
23
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
24
|
+
developers to develop all their processors, flows, and jobs within.
|
25
|
+
The deploy pack can be thought of as a container for all the necessary
|
26
|
+
Wukong code and plugins useful in the context of an Infochimps
|
27
|
+
Platform application. It includes the following libraries:
|
28
|
+
|
29
|
+
* <a href="http://github.com/infochimps-labs/wukong">wukong</a>: The core framework for writing processors and chaining them together.
|
30
|
+
* <a href="http://github.com/infochimps-labs/wukong-hadoop">wukong-hadoop</a>: Run Wukong processors as mappers and reducers within the Hadoop framework. Model Hadoop jobs locally before you run them.
|
31
|
+
* <a href="http://github.com/infochimps-labs/wonderdog">wonderdog</a>: Connect Wukong processors running within Hadoop to Elasticsearch as either a source or sink for data.
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
The deploy pack is installed as a RubyGem:
|
36
|
+
|
37
|
+
```
|
38
|
+
$ sudo gem install wukong-deploy
|
39
|
+
```
|
40
|
+
|
41
|
+
## File Structure
|
42
|
+
|
43
|
+
A deploy pack is a repository with the following
|
44
|
+
[Rails](http://rubyonrails.org/)-like file structure:
|
45
|
+
|
46
|
+
```
|
47
|
+
├── app
|
48
|
+
│ ├── models
|
49
|
+
│ ├── processors
|
50
|
+
│ ├── flows
|
51
|
+
│ └── jobs
|
52
|
+
├── config
|
53
|
+
│ ├── environment.rb
|
54
|
+
│ ├── application.rb
|
55
|
+
│ ├── initializers
|
56
|
+
│ ├── settings.yml
|
57
|
+
│ └── environments
|
58
|
+
│ ├── development.yml
|
59
|
+
│ ├── production.yml
|
60
|
+
│ └── test.yml
|
61
|
+
├── data
|
62
|
+
├── Gemfile
|
63
|
+
├── Gemfile.lock
|
64
|
+
├── lib
|
65
|
+
├── log
|
66
|
+
├── Rakefile
|
67
|
+
├── spec
|
68
|
+
│ ├── spec_helper.rb
|
69
|
+
│ └── support
|
70
|
+
└── tmp
|
71
|
+
```
|
72
|
+
|
73
|
+
Let's look at it piece by piece:
|
74
|
+
|
75
|
+
* <b>app</b>: The directory with all the action. It's where you define:
|
76
|
+
* <b>models</b>: Your domain models or "nouns", which define and wrap the different kinds of data elements in your application. They are built using whatever framework you like (defaults to [Gorillib](http://github.com/infochimps-labs/gorillib))
|
77
|
+
* <b>processors</b>: Your fundamental operations or "verbs", which are passed records and parse, filter, augment, normalize, or split them.
|
78
|
+
* <b>flows</b>: Chain together processors into streaming flows for ingestion, real-time processing, or [complex event processing](http://en.wikipedia.org/wiki/Complex_event_processing) (CEP)
|
79
|
+
* <b>jobs</b>: Pair processors together to create batch jobs to run in Hadoop
|
80
|
+
* <b>config</b>: Where you place all application configuration for all environments
|
81
|
+
* <b>environment.rb</b>: Defines the runtime environment for all code, requiring and configuring all Wukong framework code. You shouldn't have to edit this file directly.
|
82
|
+
* <b>application.rb</b>: Require and configure libraries specific to your application. Choose a model framework, pick what application code gets loaded by default (vs. auto-loaded).
|
83
|
+
* <b>initializers</b>: Holds any files you need to load before <b>application.rb</b> here. Useful for requiring and configuring external libraries.
|
84
|
+
* <b>settings.yml</b>: Defines application-wide settings.
|
85
|
+
* <b>environments</b>: Defines environment-specific settings in YAML files named after the environment. Overrides <b>config/settings.yml</b>.
|
86
|
+
* <b>data</b>: Holds sample data in flat files. You'll develop and test your application using this data.
|
87
|
+
* <b>Gemfile</b> and <b>Gemfile.lock</b>: Defines how libraries are resolved with [Bundler](http://gembundler.com/).
|
88
|
+
* <b>lib</b>: Holds any code you want to use in your application but that isn't "part of" your application (like vendored libraries, Rake tasks, &c.).
|
89
|
+
* <b>log</b>: A good place to stash logs.
|
90
|
+
* <b>Rakefile</b>: Defines [Rake](http://rake.rubyforge.org/) tasks for the development, test, and deploy of your application.
|
91
|
+
* <b>spec</b>: Holds all your [RSpec](http://rspec.info/) unit tests.
|
92
|
+
* <b>spec_helper.rb</b>: Loads libraries you'll use during testing, includes spec helper libraries from Wukong.
|
93
|
+
* <b>support</b>: Holds support code for your tests.
|
94
|
+
* <b>tmp</b>: A good place to stash temporary files.
|
data/Rakefile
ADDED
data/bin/wu-deploy
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Require Wukong so we can write processors & dataflows and use
|
2
|
+
# build-in widgets.
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
# Require just enough of wukong-hadoop to give us changes at the
|
6
|
+
# processor level. The rest is only required when running the
|
7
|
+
# 'wu-hadoop' program. Also grab configuration since we'll need it.
|
8
|
+
require 'wukong-hadoop/extensions'
|
9
|
+
require 'wukong-hadoop/configuration'
|
10
|
+
|
11
|
+
# Grab configuration from Wonderdog.
|
12
|
+
require 'wonderdog'
|
13
|
+
|
14
|
+
module Wukong
|
15
|
+
# Provides some helper methods for loading the assets within a
|
16
|
+
# deploy pack at runtime and for Wukong plugins to ask questions
|
17
|
+
# about the deploy pack they're running in.
|
18
|
+
module Deploy
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
# Now load the rest of the deploy pack.
|
24
|
+
require 'wukong-deploy/deploy_pack'
|
25
|
+
require 'wukong-deploy/configuration'
|
26
|
+
require 'wukong-deploy/driver'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Pass in +settings+ that need to be modified for deploy-specific
|
5
|
+
# settings *before* they are resolved.
|
6
|
+
#
|
7
|
+
# @param [Configliere::Param] settings
|
8
|
+
# @return [Configliere::Param] the newly configured settings
|
9
|
+
def self.configure settings
|
10
|
+
settings.define(:environment, :description => "The environment to run in", :default => 'development', :flag => 'e', :env_var => 'ICS_PLATFORM_ENV')
|
11
|
+
|
12
|
+
case executable
|
13
|
+
when 'wu-hadoop'
|
14
|
+
Wukong::Elasticsearch.configure(settings) if executable == 'wu-hadoop'
|
15
|
+
end
|
16
|
+
settings
|
17
|
+
end
|
18
|
+
|
19
|
+
# All configuration for Wukong::Deploy lives in this object.
|
20
|
+
Configuration = configure(Configliere::Param.new) unless defined?(Configuration)
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
|
4
|
+
# Boot the deploy pack, reading all available +settings+ and
|
5
|
+
# rooting it at the given +path+.
|
6
|
+
#
|
7
|
+
# @param [Configliere::Param] settings
|
8
|
+
# @param [String] path
|
9
|
+
def self.boot! settings, path
|
10
|
+
@pre_deploy_settings = settings.dup
|
11
|
+
@settings = settings
|
12
|
+
@root = Pathname.new(path)
|
13
|
+
read_common_settings
|
14
|
+
read_environment_settings
|
15
|
+
read_remote_settings
|
16
|
+
end
|
17
|
+
|
18
|
+
# Return the root directory of this deploy pack.
|
19
|
+
#
|
20
|
+
# @return [Pathname]
|
21
|
+
def self.root
|
22
|
+
@root
|
23
|
+
end
|
24
|
+
|
25
|
+
# Recursively require each Ruby file +dir+.
|
26
|
+
#
|
27
|
+
# @example Requiring all .rb files anywhere within /lib/my_lib
|
28
|
+
#
|
29
|
+
# Wukong::Deploy.require_recursive("lib/my_lib")
|
30
|
+
#
|
31
|
+
# @param [String] glob
|
32
|
+
def self.require_recursive glob
|
33
|
+
Dir[root.join("#{glob}/**/*.rb")].each { |path| require(path) }
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return the name of the executable program currently running.
|
37
|
+
#
|
38
|
+
# @return [String]
|
39
|
+
def self.executable
|
40
|
+
File.basename($0)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the current environment the deploy pack is in.
|
44
|
+
#
|
45
|
+
# @return [String]
|
46
|
+
def self.environment
|
47
|
+
settings[:environment]
|
48
|
+
end
|
49
|
+
|
50
|
+
# Return the deploy pack's own settings.
|
51
|
+
#
|
52
|
+
# @return [Configliere::Param]
|
53
|
+
def self.settings
|
54
|
+
@settings
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return the settings the deploy pack had *before* it booted
|
58
|
+
# itself -- these are used to pass to other tools when invoking
|
59
|
+
# them, since they will read the deploy pack settings themselves
|
60
|
+
# anyway.
|
61
|
+
#
|
62
|
+
# @return [Configliere::Param]
|
63
|
+
def self.pre_deploy_settings
|
64
|
+
@pre_deploy_settings
|
65
|
+
end
|
66
|
+
|
67
|
+
# The root directory of this deploy pack.
|
68
|
+
#
|
69
|
+
# @return [Pathname]
|
70
|
+
def self.root
|
71
|
+
@root
|
72
|
+
end
|
73
|
+
|
74
|
+
# The directory this deploy pack keeps local, sample data in.
|
75
|
+
#
|
76
|
+
# @return [Pathname]
|
77
|
+
def self.data_dir
|
78
|
+
root.join('data')
|
79
|
+
end
|
80
|
+
|
81
|
+
# The directory this deploy pack uses for configuration files.
|
82
|
+
#
|
83
|
+
# @return [Pathname]
|
84
|
+
def self.config_dir
|
85
|
+
root.join('config')
|
86
|
+
end
|
87
|
+
|
88
|
+
# The directory this deploy pack uses for temporary files.
|
89
|
+
#
|
90
|
+
# @return [Pathname]
|
91
|
+
def self.tmp_dir
|
92
|
+
root.join('tmp')
|
93
|
+
end
|
94
|
+
|
95
|
+
# The directory this deploy pack uses for logs.
|
96
|
+
#
|
97
|
+
# @return [Pathname]
|
98
|
+
def self.log_dir
|
99
|
+
root.join('log')
|
100
|
+
end
|
101
|
+
|
102
|
+
# The diretory this deploy pack puts all its application code in.
|
103
|
+
#
|
104
|
+
# @return [Pathname]
|
105
|
+
def self.app_dir
|
106
|
+
root.join('app')
|
107
|
+
end
|
108
|
+
|
109
|
+
# The logger for this deploy pack.
|
110
|
+
#
|
111
|
+
# @return [Logger]
|
112
|
+
def self.logger
|
113
|
+
return @logger if @logger
|
114
|
+
# FIXME -- want to use the Wukong logger here...
|
115
|
+
require 'logger'
|
116
|
+
@logger ||= defined?(Log) ? Log : Logger.new
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
# Read settings common across all environments from
|
122
|
+
# config/settings.yml.
|
123
|
+
def self.read_common_settings
|
124
|
+
read_settings_from_file(settings, config_dir.join("settings.yml"))
|
125
|
+
end
|
126
|
+
|
127
|
+
# Read settings unique to this deploy pack's current environment
|
128
|
+
# from config/ENVIRONMENT.yml.
|
129
|
+
def self.read_environment_settings
|
130
|
+
read_settings_from_file(settings, config_dir.join("environments", "#{environment}.yml"))
|
131
|
+
end
|
132
|
+
|
133
|
+
# Update +settings+ with the configuration at the given +path+.
|
134
|
+
#
|
135
|
+
# @param [Configliere::Param] settings
|
136
|
+
# @param [String, Pathname] path
|
137
|
+
def self.read_settings_from_file settings, path
|
138
|
+
if File.exist?(path) && File.readable?(path) && File.file?(path)
|
139
|
+
settings.read(path)
|
140
|
+
else
|
141
|
+
logger.warn("Cannot read settings file at #{path}.")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Read remote settings.
|
146
|
+
#
|
147
|
+
# FIXME -- not implemented yet.
|
148
|
+
def self.read_remote_settings
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Wukong
|
4
|
+
module Deploy
|
5
|
+
class Driver
|
6
|
+
|
7
|
+
attr_accessor :settings
|
8
|
+
attr_accessor :args
|
9
|
+
|
10
|
+
def self.run(settings, *args)
|
11
|
+
begin
|
12
|
+
new(settings,*args).run!
|
13
|
+
rescue Wukong::Error => e
|
14
|
+
$stderr.puts(e.message)
|
15
|
+
exit(1)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def run!
|
20
|
+
case args.first
|
21
|
+
when 'new'
|
22
|
+
require_relative('templater')
|
23
|
+
raise Error.new("Must provide a path to the root of the deploy pack you want to create") if args[1].nil? || args[1].blank?
|
24
|
+
Templater.new(File.expand_path(args[1], Dir.pwd)).create
|
25
|
+
when 'console'
|
26
|
+
require_relative('console')
|
27
|
+
Wukong::Deploy::Console.new.run!
|
28
|
+
else
|
29
|
+
settings.dump_help
|
30
|
+
exit(2)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize settings, *args
|
35
|
+
self.settings = settings
|
36
|
+
self.args = args
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Wukong
|
2
|
+
module Deploy
|
3
|
+
class Repo
|
4
|
+
|
5
|
+
TOP_LEVEL_DIR_NAMES = %w[app config data lib log script spec tmp]
|
6
|
+
|
7
|
+
attr_reader :root
|
8
|
+
|
9
|
+
def initialize root
|
10
|
+
@root = Pathname.new(root)
|
11
|
+
end
|
12
|
+
|
13
|
+
TOP_LEVEL_DIR_NAMES.each do |name|
|
14
|
+
define_method("#{name}_dir") do |*args|
|
15
|
+
root.join(name, *args)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def dirs_to_create
|
20
|
+
[
|
21
|
+
root,
|
22
|
+
app_dir('models'),
|
23
|
+
app_dir('processors'),
|
24
|
+
app_dir('flows'),
|
25
|
+
app_dir('jobs'),
|
26
|
+
|
27
|
+
config_dir('environments'),
|
28
|
+
config_dir('initializers'),
|
29
|
+
|
30
|
+
data_dir,
|
31
|
+
lib_dir,
|
32
|
+
log_dir,
|
33
|
+
script_dir,
|
34
|
+
|
35
|
+
spec_dir('models'),
|
36
|
+
spec_dir('processors'),
|
37
|
+
spec_dir('flows'),
|
38
|
+
spec_dir('jobs'),
|
39
|
+
spec_dir('support'),
|
40
|
+
|
41
|
+
tmp_dir
|
42
|
+
]
|
43
|
+
end
|
44
|
+
|
45
|
+
def files_to_template
|
46
|
+
%w[
|
47
|
+
config/boot.rb
|
48
|
+
config/application.rb
|
49
|
+
config/environment.rb
|
50
|
+
config/settings.yml
|
51
|
+
config/environments/development.yml
|
52
|
+
config/environments/test.yml
|
53
|
+
config/environments/production.yml
|
54
|
+
|
55
|
+
Gemfile
|
56
|
+
Rakefile
|
57
|
+
README.md
|
58
|
+
|
59
|
+
spec/spec_helper.rb
|
60
|
+
]
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
namespace 'bundler' do
|
4
|
+
desc "Remove local bundle repository"
|
5
|
+
task :clean do
|
6
|
+
Wukong::Deploy.logger.info "Removing local bundle at /.bundle and /bundle"
|
7
|
+
sh "rm -rf .bundle bundle"
|
8
|
+
Wukong::Deploy.logger.info "Make sure to run 'bundle install --standalone' to re-create the bundle"
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'erubis'
|
4
|
+
|
5
|
+
require_relative('repo')
|
6
|
+
|
7
|
+
module Wukong
|
8
|
+
module Deploy
|
9
|
+
class Templater
|
10
|
+
|
11
|
+
attr_accessor :repo
|
12
|
+
attr_accessor :options
|
13
|
+
|
14
|
+
include FileUtils::Verbose
|
15
|
+
|
16
|
+
def initialize root, options={}
|
17
|
+
self.repo = Repo.new(root)
|
18
|
+
self.options = options
|
19
|
+
end
|
20
|
+
|
21
|
+
def create
|
22
|
+
create_dirs
|
23
|
+
create_templates
|
24
|
+
create_gitkeeps
|
25
|
+
create_gitignore
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_dirs
|
29
|
+
repo.dirs_to_create.each { |dir| mkdir_p(dir) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def create_templates
|
33
|
+
repo.files_to_template.each do |repo_path|
|
34
|
+
create_template(templates_dir.join("#{repo_path}.erb"), repo.root.join(repo_path))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def create_template input_path, output_path, binding={}
|
39
|
+
input = File.read(input_path)
|
40
|
+
erb = Erubis::Eruby.new(input)
|
41
|
+
output = erb.result(binding)
|
42
|
+
action = File.exist?(output_path) ? 'modify' : 'create'
|
43
|
+
puts "#{action} #{output_path}"
|
44
|
+
File.open(output_path, 'w') { |f| f.puts(output) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_gitkeeps
|
48
|
+
repo.dirs_to_create.each do |dir|
|
49
|
+
if Dir[File.join(dir, '*')].empty?
|
50
|
+
touch(File.join(dir, '.gitkeep'))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_gitignore
|
56
|
+
create_template(templates_dir.join('gitignore'), repo.root.join('.gitignore'))
|
57
|
+
end
|
58
|
+
|
59
|
+
def templates_dir
|
60
|
+
@templates_dir ||= Pathname.new(File.expand_path('../../../templates', __FILE__))
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Use the public [RubyGems server](http://rubygems.org/)
|
2
|
+
source :rubygems
|
3
|
+
|
4
|
+
# = Application
|
5
|
+
#
|
6
|
+
# The following dependencies are for your application.
|
7
|
+
#
|
8
|
+
# Does one of your processors require the UserAgent library because
|
9
|
+
# it's parsing HTTP User-Agent strings? Just insert
|
10
|
+
#
|
11
|
+
# gem 'useragent'
|
12
|
+
#
|
13
|
+
# here and then remember to run
|
14
|
+
#
|
15
|
+
# $ bundle install --standalone
|
16
|
+
#
|
17
|
+
# Here's a link to the [Bundler documentation](http://gembundler.com/)
|
18
|
+
# for your convenience.
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
#
|
26
|
+
# = Framework
|
27
|
+
#
|
28
|
+
# The following libraries are part of the deploy pack framework. You
|
29
|
+
# shouldn't need to change these.
|
30
|
+
#
|
31
|
+
|
32
|
+
gem 'wukong-deploy', :git => 'https://github.com/infochimps-labs/wukong-deploy'
|
33
|
+
gem 'wukong', :git => 'https://github.com/infochimps-labs/wukong', :branch => '3.0.0'
|
34
|
+
gem 'wukong-hadoop', :git => 'https://github.com/infochimps-labs/wukong-hadoop'
|
35
|
+
gem 'wonderdog', :git => 'https://github.com/infochimps-labs/wonderdog'
|
36
|
+
gem 'vayacondios-client', :git => 'https://github.com/infochimps-labs/vayacondios'
|
37
|
+
|
38
|
+
group :development do
|
39
|
+
gem 'rspec'
|
40
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Welcome to your new deploy pack.
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# = config/application.rb
|
2
|
+
#
|
3
|
+
# Loads the deploy pack application and all supporting libraries
|
4
|
+
# including Wukong and any of its plugins.
|
5
|
+
#
|
6
|
+
# The load order is as follows:
|
7
|
+
#
|
8
|
+
# * [Wukong-Deploy](http://github.com/infochimps-labs/wukong-deploy) which loads all Wukong and Wukong plugins.
|
9
|
+
# * Any code you put in the `config/initializers` directory. This is a good place to configure additional libraries to support your application.
|
10
|
+
# * Your application code itself, in the following order
|
11
|
+
# * models
|
12
|
+
# * processors
|
13
|
+
# * flows
|
14
|
+
# * jobs
|
15
|
+
# * anything else
|
16
|
+
#
|
17
|
+
# You can put code you want available but you don't intend to use in
|
18
|
+
# your application inside the `lib` directory.
|
19
|
+
|
20
|
+
#
|
21
|
+
# == Deploy Pack Framework
|
22
|
+
#
|
23
|
+
|
24
|
+
# Boot up first
|
25
|
+
require_relative("boot")
|
26
|
+
|
27
|
+
# Require the Wukong-Deploy plugin which will itself require and
|
28
|
+
# configure core Wukong and other plugins.
|
29
|
+
require 'wukong-deploy'
|
30
|
+
|
31
|
+
# Load initializers containing any requires or configuration code
|
32
|
+
# needed by the app.
|
33
|
+
Dir[File.dirname(__FILE__) + '/initializers/**/*.rb'].each { |path| require(path) }
|
34
|
+
|
35
|
+
#
|
36
|
+
# == Model Support
|
37
|
+
#
|
38
|
+
# We're going to use Gorillib, Infochimps' own library of useful
|
39
|
+
# methods and Ruby hacks.
|
40
|
+
require 'gorillib'
|
41
|
+
require 'gorillib/model/serialization'
|
42
|
+
require 'gorillib/object/blank'
|
43
|
+
|
44
|
+
#
|
45
|
+
# But you could use a more Rails-y support stack
|
46
|
+
#
|
47
|
+
# require 'active_support'
|
48
|
+
|
49
|
+
#
|
50
|
+
# Or DataMapper
|
51
|
+
#
|
52
|
+
# require 'dm-core'
|
53
|
+
|
54
|
+
#
|
55
|
+
# == Application Structure
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# By default, we try not to be clever and just load *everything* in
|
59
|
+
# app/. If you have a lot of application code, this may be slow on
|
60
|
+
# startup and you may want to only load common classes or define
|
61
|
+
# autoloads or whatever you want to do.
|
62
|
+
Dir[File.expand_path('../../app/models/**/*.rb', __FILE__)].each { |path| require(path) }
|
63
|
+
Dir[File.expand_path('../../app/processors/**/*.rb', __FILE__)].each { |path| require(path) }
|
64
|
+
Dir[File.expand_path('../../app/flows/**/*.rb', __FILE__)].each { |path| require(path) }
|
65
|
+
Dir[File.expand_path('../../app/jobs/**/*.rb', __FILE__)].each { |path| require(path) }
|
66
|
+
Dir[File.expand_path('../../app/**/*.rb', __FILE__)].each { |path| require(path) }
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# = config/boot.rb
|
2
|
+
#
|
3
|
+
# Loads the Ruby language framework and activates Bundler.
|
4
|
+
#
|
5
|
+
# We use a standalone bundle installation becaues it makes it easier
|
6
|
+
# to have non-Ruby code in underlying frameworks interact with the
|
7
|
+
# deploy pack.
|
8
|
+
#
|
9
|
+
# Make sure you install the bundle locally with the following command:
|
10
|
+
#
|
11
|
+
# @example Install a standalone bundle of gems
|
12
|
+
#
|
13
|
+
# $ bundle install --standalone
|
14
|
+
|
15
|
+
setup_path = File.expand_path("../../bundle/bundler/setup.rb", __FILE__)
|
16
|
+
begin
|
17
|
+
require setup_path
|
18
|
+
rescue LoadError => e
|
19
|
+
$stderr.puts("Failed to require \"#{setup_path}\". Try running\n\n bundle install --standalone\n\n")
|
20
|
+
exit(1)
|
21
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# = config/environment.rb
|
2
|
+
#
|
3
|
+
# Loads all the Ruby language and framework code that defines the
|
4
|
+
# runtime environment of your application.
|
5
|
+
|
6
|
+
require_relative("application")
|
7
|
+
|
8
|
+
Wukong::Deploy.boot!(Wukong::Deploy::Configuration, File.expand_path("../../", __FILE__))
|
@@ -0,0 +1,54 @@
|
|
1
|
+
---
|
2
|
+
|
3
|
+
# = config/settings.yml
|
4
|
+
#
|
5
|
+
# Any settings defined in this file will be merged into a global
|
6
|
+
# Settings object within any of the wu-tools which run inside this
|
7
|
+
# deploy pack.
|
8
|
+
#
|
9
|
+
# Settings will additionally be loaded from a file named after the
|
10
|
+
# ICS_PLATFORM_ENV environment variable and will override those
|
11
|
+
# declared here. The 'development' environment is the default
|
12
|
+
# environment loaded.
|
13
|
+
#
|
14
|
+
# Any settings specified on the command line when invoking wu-tools
|
15
|
+
# will take precedence over settings within any of these files.
|
16
|
+
#
|
17
|
+
|
18
|
+
|
19
|
+
#
|
20
|
+
# == Notifier
|
21
|
+
#
|
22
|
+
# Notifiers are used within a Processor for out of band event
|
23
|
+
# processing.
|
24
|
+
notifier:
|
25
|
+
|
26
|
+
# The type of notifier to use is better set on a per-environment
|
27
|
+
# basis. See the correspdoning YAML files in config/environments.
|
28
|
+
# type: log
|
29
|
+
|
30
|
+
#
|
31
|
+
# === Log notifier
|
32
|
+
#
|
33
|
+
# The log notifier will print each event to the log.
|
34
|
+
|
35
|
+
# A path to a log file.
|
36
|
+
# log: log/wu.log
|
37
|
+
|
38
|
+
# The priority level of the log
|
39
|
+
# level: info
|
40
|
+
|
41
|
+
#
|
42
|
+
# === HTTP notifier
|
43
|
+
#
|
44
|
+
# The HTTP notifier will send events over HTTP to an instance of the
|
45
|
+
# Infochimps Platform API
|
46
|
+
|
47
|
+
# The host to send each event to. Your host is typically determined
|
48
|
+
# by the name of your platform account.
|
49
|
+
# host: api.INSERT_ACCOUNT_NAME_HERE.chimpy.us
|
50
|
+
|
51
|
+
# The port to send each event to.
|
52
|
+
# port: 9000
|
53
|
+
|
54
|
+
# You can put processor-specfic settings within this file as well.
|
data/templates/gitignore
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# version control
|
2
|
+
.svn
|
3
|
+
.hg
|
4
|
+
.bzr
|
5
|
+
|
6
|
+
# editors
|
7
|
+
\#*
|
8
|
+
.#*
|
9
|
+
\.#*
|
10
|
+
*~
|
11
|
+
*.tmproj
|
12
|
+
*._flymake.*
|
13
|
+
*.project
|
14
|
+
|
15
|
+
# os x
|
16
|
+
.DS_Store
|
17
|
+
Icon?
|
18
|
+
|
19
|
+
# backup
|
20
|
+
.bak
|
21
|
+
|
22
|
+
# tools
|
23
|
+
TAGS*
|
24
|
+
nohop.out
|
25
|
+
a.out
|
26
|
+
*.pyc
|
27
|
+
*.so
|
28
|
+
*.stackdump
|
29
|
+
.yardoc
|
30
|
+
|
31
|
+
# commonly ignored files
|
32
|
+
/**/*DO_NOT_VERSION*
|
33
|
+
/**/log/*
|
34
|
+
/**/tmp/*
|
35
|
+
/doc/*
|
36
|
+
/pkg/*
|
37
|
+
|
38
|
+
.bundle
|
39
|
+
bundle/*
|
@@ -0,0 +1,13 @@
|
|
1
|
+
ENV["ICS_PLATFORM_ENV"] ||= "test"
|
2
|
+
|
3
|
+
require_relative('../config/environment')
|
4
|
+
|
5
|
+
# Require spec helpers from Wukong
|
6
|
+
require 'wukong/spec_helpers'
|
7
|
+
|
8
|
+
Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |path| require path }
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.mock_with :rspec
|
12
|
+
include Wukong::SpecHelpers
|
13
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/wukong-deploy/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = 'wukong-deploy'
|
6
|
+
gem.homepage = 'https://github.com/infochimps-labs/wukong-deploy'
|
7
|
+
gem.licenses = ["Apache 2.0"]
|
8
|
+
gem.email = 'coders@infochimps.com'
|
9
|
+
gem.authors = ['Infochimps', 'Dhruv Bansal', 'Travis Dempsey']
|
10
|
+
gem.version = Wukong::Deploy::VERSION
|
11
|
+
|
12
|
+
gem.summary = 'Defines the deploy pack framework used by the Infochimps Platform'
|
13
|
+
gem.description = <<-EOF
|
14
|
+
The Infochimps Platform is an end-to-end, managed solution for
|
15
|
+
building Big Data applications. It integrates best-of-breed
|
16
|
+
technologies like Hadoop, Storm, Kafka, MongoDB, ElasticSearch,
|
17
|
+
HBase, &c. and provides simple interfaces for accessing these
|
18
|
+
powerful tools.
|
19
|
+
|
20
|
+
Computation, analytics, scripting, &c. are all handled by Wukong
|
21
|
+
within the platform. Wukong is an abstract framework for defining
|
22
|
+
computations on data. Wukong processors and flows can run in many
|
23
|
+
different execution contexts including:
|
24
|
+
|
25
|
+
* locally on the command-line for testing or development purposes
|
26
|
+
* as a Hadoop mapper or reducer for batch analytics or ETL
|
27
|
+
* within Storm as part of a real-time data flow
|
28
|
+
|
29
|
+
The Infochimps Platform uses the concept of a deploy pack for
|
30
|
+
developers to develop all their processors, flows, and jobs within.
|
31
|
+
EOF
|
32
|
+
|
33
|
+
gem.files = `git ls-files`.split("\n")
|
34
|
+
gem.executables = ['wu-deploy']
|
35
|
+
gem.test_files = gem.files.grep(/^spec/)
|
36
|
+
gem.require_paths = ['lib']
|
37
|
+
|
38
|
+
gem.add_dependency('wukong', '3.0.0.pre2')
|
39
|
+
gem.add_dependency('wukong-hadoop')
|
40
|
+
gem.add_dependency('wonderdog')
|
41
|
+
gem.add_dependency('erubis')
|
42
|
+
|
43
|
+
gem.add_dependency('rake', '~> 0.9')
|
44
|
+
gem.add_development_dependency 'rspec', '~> 2'
|
45
|
+
|
46
|
+
end
|
47
|
+
|
metadata
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wukong-deploy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Infochimps
|
9
|
+
- Dhruv Bansal
|
10
|
+
- Travis Dempsey
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2012-12-01 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: wukong
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - '='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 3.0.0.pre2
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
28
|
+
requirements:
|
29
|
+
- - '='
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: 3.0.0.pre2
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: wukong-hadoop
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: wonderdog
|
50
|
+
requirement: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
type: :runtime
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: erubis
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ! '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: rake
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0.9'
|
88
|
+
type: :runtime
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0.9'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: rspec
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '2'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '2'
|
112
|
+
description: ! " The Infochimps Platform is an end-to-end, managed solution for\n
|
113
|
+
\ building Big Data applications. It integrates best-of-breed\n technologies like
|
114
|
+
Hadoop, Storm, Kafka, MongoDB, ElasticSearch,\n HBase, &c. and provides simple
|
115
|
+
interfaces for accessing these\n powerful tools.\n\n Computation, analytics, scripting,
|
116
|
+
&c. are all handled by Wukong\n within the platform. Wukong is an abstract framework
|
117
|
+
for defining\n computations on data. Wukong processors and flows can run in many\n
|
118
|
+
\ different execution contexts including:\n\n * locally on the command-line for
|
119
|
+
testing or development purposes\n * as a Hadoop mapper or reducer for batch analytics
|
120
|
+
or ETL\n * within Storm as part of a real-time data flow\n\n The Infochimps
|
121
|
+
Platform uses the concept of a deploy pack for\n developers to develop all their
|
122
|
+
processors, flows, and jobs within.\n"
|
123
|
+
email: coders@infochimps.com
|
124
|
+
executables:
|
125
|
+
- wu-deploy
|
126
|
+
extensions: []
|
127
|
+
extra_rdoc_files: []
|
128
|
+
files:
|
129
|
+
- .gitignore
|
130
|
+
- .rspec
|
131
|
+
- Gemfile
|
132
|
+
- README.md
|
133
|
+
- Rakefile
|
134
|
+
- bin/wu-deploy
|
135
|
+
- lib/wukong-deploy.rb
|
136
|
+
- lib/wukong-deploy/configuration.rb
|
137
|
+
- lib/wukong-deploy/console.rb
|
138
|
+
- lib/wukong-deploy/deploy_pack.rb
|
139
|
+
- lib/wukong-deploy/driver.rb
|
140
|
+
- lib/wukong-deploy/repo.rb
|
141
|
+
- lib/wukong-deploy/tasks.rb
|
142
|
+
- lib/wukong-deploy/templater.rb
|
143
|
+
- lib/wukong-deploy/version.rb
|
144
|
+
- templates/Gemfile.erb
|
145
|
+
- templates/README.md.erb
|
146
|
+
- templates/Rakefile.erb
|
147
|
+
- templates/config/application.rb.erb
|
148
|
+
- templates/config/boot.rb.erb
|
149
|
+
- templates/config/environment.rb.erb
|
150
|
+
- templates/config/environments/development.yml.erb
|
151
|
+
- templates/config/environments/production.yml.erb
|
152
|
+
- templates/config/environments/test.yml.erb
|
153
|
+
- templates/config/settings.yml.erb
|
154
|
+
- templates/gitignore
|
155
|
+
- templates/spec/spec_helper.rb.erb
|
156
|
+
- wukong-deploy.gemspec
|
157
|
+
homepage: https://github.com/infochimps-labs/wukong-deploy
|
158
|
+
licenses:
|
159
|
+
- Apache 2.0
|
160
|
+
post_install_message:
|
161
|
+
rdoc_options: []
|
162
|
+
require_paths:
|
163
|
+
- lib
|
164
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
165
|
+
none: false
|
166
|
+
requirements:
|
167
|
+
- - ! '>='
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: '0'
|
170
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
+
none: false
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
175
|
+
version: '0'
|
176
|
+
requirements: []
|
177
|
+
rubyforge_project:
|
178
|
+
rubygems_version: 1.8.23
|
179
|
+
signing_key:
|
180
|
+
specification_version: 3
|
181
|
+
summary: Defines the deploy pack framework used by the Infochimps Platform
|
182
|
+
test_files: []
|
183
|
+
has_rdoc:
|