pipely 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pipely/build.rb +31 -0
- data/lib/pipely/build/daily_scheduler.rb +37 -0
- data/lib/pipely/build/definition.rb +26 -0
- data/lib/pipely/build/environment_config.rb +34 -0
- data/lib/pipely/build/right_now_scheduler.rb +27 -0
- data/lib/pipely/build/s3_path_builder.rb +52 -0
- data/lib/pipely/build/template.rb +43 -0
- data/lib/pipely/build/template_helpers.rb +53 -0
- data/lib/pipely/deploy.rb +1 -0
- data/lib/pipely/deploy/client.rb +79 -0
- data/lib/pipely/tasks.rb +9 -0
- data/lib/pipely/tasks/definition.rb +69 -0
- data/lib/pipely/tasks/deploy.rb +58 -0
- data/lib/pipely/tasks/graph.rb +75 -0
- data/lib/pipely/tasks/upload_steps.rb +86 -0
- data/lib/pipely/version.rb +1 -1
- data/spec/lib/pipely/build/daily_scheduler_spec.rb +33 -0
- data/spec/lib/pipely/build/right_now_scheduler_spec.rb +19 -0
- data/spec/lib/pipely/build/s3_path_builder_spec.rb +46 -0
- data/spec/lib/pipely/build/template_spec.rb +85 -0
- data/spec/lib/pipely/build_spec.rb +3 -0
- data/spec/lib/pipely/deploy/client_spec.rb +28 -0
- data/spec/spec_helper.rb +9 -0
- metadata +75 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e753a1c8b9207a58a13e7f4454b4d777ee2b42ba
|
4
|
+
data.tar.gz: af3d1bbe539a028bcbc58c5e6f1ca2df908f6a41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56fdf912f37ebb81a7f17a7a8a6c5686228165045d5917a48f12621b4a83357b7bb73fcb0e3562c0a0df9f53ec8d5131d484056d0f8283a819b1f04daf0c46f9
|
7
|
+
data.tar.gz: 6442124a52915c429e9a2e0ada617b687c10c5db887b2c84f1b77f3a96ec1e4e7a0cd789240e594f82807ab5821ebf1f973960654a35a7f29c7375cc4d2d1708
|
data/lib/pipely/build.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'pipely/build/definition'
|
2
|
+
require 'pipely/build/template'
|
3
|
+
require 'pipely/build/daily_scheduler'
|
4
|
+
require 'pipely/build/right_now_scheduler'
|
5
|
+
require 'pipely/build/s3_path_builder'
|
6
|
+
require 'pipely/build/environment_config'
|
7
|
+
|
8
|
+
module Pipely
|
9
|
+
|
10
|
+
# Turn Templates+config into a deployable Definition.
|
11
|
+
#
|
12
|
+
module Build
|
13
|
+
|
14
|
+
def self.build_definition(template, environment, config_path)
|
15
|
+
env = environment.to_sym
|
16
|
+
config = EnvironmentConfig.load(config_path, env)
|
17
|
+
|
18
|
+
case environment.to_sym
|
19
|
+
when :production
|
20
|
+
s3_prefix = "production/#{config[:namespace]}"
|
21
|
+
scheduler = DailyScheduler.new
|
22
|
+
when :staging
|
23
|
+
s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
|
24
|
+
scheduler = RightNowScheduler.new
|
25
|
+
end
|
26
|
+
|
27
|
+
Definition.new(template, env, s3_prefix, scheduler, config)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Compute schedule attributes for a pipeline that runs once-a-day at a set
|
5
|
+
# time.
|
6
|
+
#
|
7
|
+
class DailyScheduler
|
8
|
+
|
9
|
+
def initialize(start_time="11:00:00")
|
10
|
+
@start_time = start_time
|
11
|
+
end
|
12
|
+
|
13
|
+
def period
|
14
|
+
'24 hours'
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_date_time
|
18
|
+
date = Date.today
|
19
|
+
|
20
|
+
# if start_time already happened today, wait for tomorrow's start_time
|
21
|
+
now_time = Time.now.utc.strftime('%H:%M:%S')
|
22
|
+
date += 1 if now_time >= @start_time
|
23
|
+
|
24
|
+
date.strftime("%Y-%m-%dT#{@start_time}")
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_hash
|
28
|
+
{
|
29
|
+
:period => period,
|
30
|
+
:start_date_time => start_date_time
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Represent a pipeline definition, built from a Template and some config.
|
5
|
+
#
|
6
|
+
class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
|
7
|
+
def pipeline_name
|
8
|
+
config[:name]
|
9
|
+
end
|
10
|
+
|
11
|
+
def s3_path_builder
|
12
|
+
S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_json
|
16
|
+
template.apply_config(:environment => env)
|
17
|
+
template.apply_config(config)
|
18
|
+
template.apply_config(s3_path_builder.to_hash)
|
19
|
+
template.apply_config(scheduler.to_hash)
|
20
|
+
|
21
|
+
template.to_json
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Pipely
|
4
|
+
module Build
|
5
|
+
|
6
|
+
# Work with YAML config files that contain parallel configs for various
|
7
|
+
# environments.
|
8
|
+
#
|
9
|
+
class EnvironmentConfig < Hash
|
10
|
+
|
11
|
+
def self.load(filename, environment)
|
12
|
+
raw = YAML.load_file(filename)[environment.to_s]
|
13
|
+
load_from_hash(raw)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.load_from_hash(attributes)
|
17
|
+
config = new
|
18
|
+
|
19
|
+
attributes.each do |k, v|
|
20
|
+
case v
|
21
|
+
when Hash
|
22
|
+
config[k.to_sym] = load_from_hash(v)
|
23
|
+
else
|
24
|
+
config[k.to_sym] = v.clone
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
config
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Compute schedule attributes for a pipeline that should run immediately
|
5
|
+
# after being deployed.
|
6
|
+
#
|
7
|
+
class RightNowScheduler
|
8
|
+
|
9
|
+
def period
|
10
|
+
'12 hours'
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_date_time
|
14
|
+
Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S")
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_hash
|
18
|
+
{
|
19
|
+
:period => period,
|
20
|
+
:start_date_time => start_date_time
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Builds paths to assets, logs, and steps that are on S3.
|
5
|
+
#
|
6
|
+
class S3PathBuilder
|
7
|
+
|
8
|
+
attr_reader :assets_bucket, :logs_bucket, :steps_bucket
|
9
|
+
|
10
|
+
START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
@assets_bucket = options[:assets]
|
14
|
+
@logs_bucket = options[:logs]
|
15
|
+
@steps_bucket = options[:steps]
|
16
|
+
@s3prefix = options[:prefix]
|
17
|
+
end
|
18
|
+
|
19
|
+
def s3_log_prefix
|
20
|
+
"s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def s3_step_prefix
|
24
|
+
"s3://#{@steps_bucket}/#{@s3prefix}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def s3n_step_prefix
|
28
|
+
"s3n://#{@steps_bucket}/#{@s3prefix}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def s3_asset_prefix
|
32
|
+
"s3://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def s3n_asset_prefix
|
36
|
+
"s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_hash
|
40
|
+
{
|
41
|
+
:s3_log_prefix => s3_log_prefix,
|
42
|
+
:s3_step_prefix => s3_step_prefix,
|
43
|
+
:s3n_step_prefix => s3n_step_prefix,
|
44
|
+
:s3_asset_prefix => s3_asset_prefix,
|
45
|
+
:s3n_asset_prefix => s3n_asset_prefix
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
require 'pipely/build/template_helpers'
|
5
|
+
|
6
|
+
module Pipely
|
7
|
+
module Build
|
8
|
+
|
9
|
+
# An ERB template that can be interpolated with config hashes to render a
|
10
|
+
# deployable pipeline definition.
|
11
|
+
#
|
12
|
+
class Template
|
13
|
+
include TemplateHelpers
|
14
|
+
|
15
|
+
def initialize(source)
|
16
|
+
@source = source
|
17
|
+
@config = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def apply_config(attributes)
|
21
|
+
@config.merge!(attributes.symbolize_keys)
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
ERB.new(@source).result(binding)
|
26
|
+
end
|
27
|
+
|
28
|
+
def respond_to_missing(method_name, include_private=false)
|
29
|
+
@config.keys.include?(method_name.to_s) || super
|
30
|
+
end
|
31
|
+
|
32
|
+
def method_missing(method_name, *args, &block)
|
33
|
+
if @config.keys.include?(method_name)
|
34
|
+
@config[method_name]
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Helper methods used by ERB templates.
|
5
|
+
#
|
6
|
+
module TemplateHelpers
|
7
|
+
|
8
|
+
def s3_asset_path(path)
|
9
|
+
"#{s3_asset_prefix if '/' == path[0]}#{path}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def s3n_asset_path(path)
|
13
|
+
"#{s3n_asset_prefix if '/' == path[0]}#{path}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def s3n_step_path(path)
|
17
|
+
"#{s3n_step_prefix if '/' == path[0]}#{path}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def streaming_hadoop_step(options)
|
21
|
+
parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
|
22
|
+
|
23
|
+
Array(options[:input]).each do |input|
|
24
|
+
parts += [ '-input', s3n_asset_path(input) ]
|
25
|
+
end
|
26
|
+
|
27
|
+
Array(options[:output]).each do |output|
|
28
|
+
parts += ['-output', s3_asset_path(output) ]
|
29
|
+
end
|
30
|
+
|
31
|
+
Array(options[:mapper]).each do |mapper|
|
32
|
+
parts += ['-mapper', s3n_step_path(mapper) ]
|
33
|
+
end
|
34
|
+
|
35
|
+
Array(options[:reducer]).each do |reducer|
|
36
|
+
parts += ['-reducer', s3n_step_path(reducer) ]
|
37
|
+
end
|
38
|
+
|
39
|
+
Array(options[:cache_file]).each do |cache_file|
|
40
|
+
parts += ['-cacheFile', s3n_asset_path(cache_file)]
|
41
|
+
end
|
42
|
+
|
43
|
+
(options[:env] || {}).each do |name, value|
|
44
|
+
parts += ['-cmdenv', "#{name}=#{value}"]
|
45
|
+
end
|
46
|
+
|
47
|
+
parts.join(',')
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'pipely/deploy/client'
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'fog'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'uuidtools'
|
5
|
+
|
6
|
+
module Pipely
|
7
|
+
module Deploy
|
8
|
+
|
9
|
+
# Client for managing deployment of rendered definitions.
|
10
|
+
#
|
11
|
+
class Client
|
12
|
+
|
13
|
+
# Generic error representing failure to deploy a rendered definition.
|
14
|
+
class PipelineDeployerError < RuntimeError; end
|
15
|
+
|
16
|
+
def initialize(log=nil)
|
17
|
+
@log = log || Logger.new(STDOUT)
|
18
|
+
@data_pipelines = Fog::AWS::DataPipeline.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def deploy_pipeline(pipeline_name, definition)
|
22
|
+
# Get a list of all existing pipelines
|
23
|
+
pipeline_ids = existing_pipelines(pipeline_name)
|
24
|
+
@log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
|
25
|
+
|
26
|
+
# Create new pipeline
|
27
|
+
created_pipeline_id = create_pipeline(pipeline_name, definition)
|
28
|
+
@log.info("Created pipeline id '#{created_pipeline_id}'")
|
29
|
+
|
30
|
+
# Delete old pipelines
|
31
|
+
pipeline_ids.each do |pipeline_id|
|
32
|
+
begin
|
33
|
+
delete_pipeline(pipeline_id)
|
34
|
+
@log.info("Deleted pipeline '#{pipeline_id}'")
|
35
|
+
|
36
|
+
rescue PipelineDeployerError => error
|
37
|
+
@log.warn(error)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def existing_pipelines(pipeline_name)
|
43
|
+
ids = []
|
44
|
+
|
45
|
+
begin
|
46
|
+
result = Fog::AWS[:data_pipeline].list_pipelines
|
47
|
+
|
48
|
+
ids += result['pipelineIdList'].
|
49
|
+
select { |p| p['name'] == pipeline_name }.
|
50
|
+
map { |p| p['id'] }
|
51
|
+
|
52
|
+
end while (result['hasMoreResults'] && result['marker'])
|
53
|
+
|
54
|
+
ids
|
55
|
+
end
|
56
|
+
|
57
|
+
def create_pipeline(pipeline_name, definition)
|
58
|
+
definition_objects = JSON.parse(definition)['objects']
|
59
|
+
|
60
|
+
unique_id = UUIDTools::UUID.random_create
|
61
|
+
|
62
|
+
created_pipeline = @data_pipelines.pipelines.create(
|
63
|
+
unique_id: unique_id,
|
64
|
+
name: pipeline_name
|
65
|
+
)
|
66
|
+
|
67
|
+
created_pipeline.put(definition_objects)
|
68
|
+
created_pipeline.activate
|
69
|
+
|
70
|
+
created_pipeline.id
|
71
|
+
end
|
72
|
+
|
73
|
+
def delete_pipeline(pipeline_id)
|
74
|
+
@data_pipelines.pipelines.get(pipeline_id).destroy
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/pipely/tasks.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Definition < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :definition
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Path where rendered definitions are written.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "definitions"
|
20
|
+
attr_accessor :path
|
21
|
+
|
22
|
+
# Pipeline definition instance
|
23
|
+
attr_accessor :definition
|
24
|
+
|
25
|
+
# Use verbose output. If this is set to true, the task will print the
|
26
|
+
# local and remote paths of each step file it uploads to S3.
|
27
|
+
#
|
28
|
+
# default:
|
29
|
+
# true
|
30
|
+
attr_accessor :verbose
|
31
|
+
|
32
|
+
def initialize(*args, &task_block)
|
33
|
+
setup_ivars(args)
|
34
|
+
|
35
|
+
directory path
|
36
|
+
|
37
|
+
desc "Graphs the full pipeline definition using Graphviz"
|
38
|
+
task name => path do |_, task_args|
|
39
|
+
RakeFileUtils.send(:verbose, verbose) do
|
40
|
+
if task_block
|
41
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
42
|
+
end
|
43
|
+
|
44
|
+
run_task verbose
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def setup_ivars(args)
|
50
|
+
@name = args.shift || :definition
|
51
|
+
@verbose = true
|
52
|
+
@path = "definitions"
|
53
|
+
end
|
54
|
+
|
55
|
+
def run_task(verbose)
|
56
|
+
puts "Generating #{target_filename}" if verbose
|
57
|
+
|
58
|
+
File.open(target_filename, 'w') do |file|
|
59
|
+
file.write(definition.to_json)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def target_filename
|
64
|
+
"#{path}/#{definition.base_filename}.json"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely/deploy'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Deploy < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :deploy
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Pipeline definition instance
|
17
|
+
attr_accessor :definition
|
18
|
+
|
19
|
+
# Use verbose output. If this is set to true, the task will print the
|
20
|
+
# local and remote paths of each step file it uploads to S3.
|
21
|
+
#
|
22
|
+
# default:
|
23
|
+
# true
|
24
|
+
attr_accessor :verbose
|
25
|
+
|
26
|
+
def initialize(*args, &task_block)
|
27
|
+
setup_ivars(args)
|
28
|
+
|
29
|
+
desc "Deploy pipeline" unless ::Rake.application.last_comment
|
30
|
+
|
31
|
+
task name, *args do |_, task_args|
|
32
|
+
RakeFileUtils.send(:verbose, verbose) do
|
33
|
+
if task_block
|
34
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
35
|
+
end
|
36
|
+
|
37
|
+
run_task verbose
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def setup_ivars(args)
|
43
|
+
@name = args.shift || :deploy
|
44
|
+
@verbose = true
|
45
|
+
end
|
46
|
+
|
47
|
+
def run_task(verbose)
|
48
|
+
Rake::Task["upload_steps"].invoke
|
49
|
+
|
50
|
+
Pipely::Deploy::Client.new.deploy_pipeline(
|
51
|
+
definition.pipeline_name,
|
52
|
+
definition.to_json
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Graph < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :graph
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Path to write graph images to.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "graphs"
|
20
|
+
attr_accessor :path
|
21
|
+
|
22
|
+
# Pipeline definition instance
|
23
|
+
attr_accessor :definition
|
24
|
+
|
25
|
+
# Use verbose output. If this is set to true, the task will print the
|
26
|
+
# local and remote paths of each step file it uploads to S3.
|
27
|
+
#
|
28
|
+
# default:
|
29
|
+
# true
|
30
|
+
attr_accessor :verbose
|
31
|
+
|
32
|
+
def initialize(*args, &task_block)
|
33
|
+
setup_ivars(args)
|
34
|
+
|
35
|
+
# create the `path` directory if it doesn't exist
|
36
|
+
directory path
|
37
|
+
|
38
|
+
namespace name do
|
39
|
+
task :full => path do |_, task_args|
|
40
|
+
RakeFileUtils.send(:verbose, verbose) do
|
41
|
+
if task_block
|
42
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
43
|
+
end
|
44
|
+
|
45
|
+
run_task verbose
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
task :open => :full do
|
50
|
+
`open #{target_filename}`
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "Graphs the full pipeline definition using Graphviz"
|
55
|
+
task name => "#{name}:full"
|
56
|
+
end
|
57
|
+
|
58
|
+
def setup_ivars(args)
|
59
|
+
@name = args.shift || :graph
|
60
|
+
@verbose = true
|
61
|
+
@path = "graphs"
|
62
|
+
end
|
63
|
+
|
64
|
+
def run_task(verbose)
|
65
|
+
puts "Generating #{target_filename}" if verbose
|
66
|
+
Pipely.draw(definition.to_json, target_filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
def target_filename
|
70
|
+
"#{path}/#{definition.base_filename}.png"
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'fog'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class UploadSteps < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :upload_steps
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Local path to where the step files are.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "steps"
|
20
|
+
attr_accessor :local_path
|
21
|
+
|
22
|
+
# Name of S3 bucket to upload steps to.
|
23
|
+
attr_accessor :s3_bucket_name
|
24
|
+
|
25
|
+
# Path within S3 bucket to upload steps to.
|
26
|
+
attr_accessor :s3_path
|
27
|
+
|
28
|
+
# Use verbose output. If this is set to true, the task will print the
|
29
|
+
# local and remote paths of each step file it uploads to S3.
|
30
|
+
#
|
31
|
+
# default:
|
32
|
+
# true
|
33
|
+
attr_accessor :verbose
|
34
|
+
|
35
|
+
def initialize(*args, &task_block)
|
36
|
+
setup_ivars(args)
|
37
|
+
|
38
|
+
unless ::Rake.application.last_comment
|
39
|
+
desc "Upload Data Pipeline steps to S3"
|
40
|
+
end
|
41
|
+
|
42
|
+
task name, *args do |_, task_args|
|
43
|
+
RakeFileUtils.send(:verbose, verbose) do
|
44
|
+
if task_block
|
45
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
46
|
+
end
|
47
|
+
|
48
|
+
run_task verbose
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_ivars(args)
|
54
|
+
@name = args.shift || :upload_steps
|
55
|
+
@verbose = true
|
56
|
+
@local_path = "steps"
|
57
|
+
end
|
58
|
+
|
59
|
+
def run_task(verbose)
|
60
|
+
with_bucket do |directory|
|
61
|
+
step_files.each do |file_name|
|
62
|
+
dest = "#{s3_path}/#{File.basename(file_name)}"
|
63
|
+
puts "uploading #{dest}" if verbose
|
64
|
+
directory.files.create(key: dest, body: File.read(file_name))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def with_bucket
|
72
|
+
storage = Fog::Storage.new({ provider: 'AWS' })
|
73
|
+
if directory = storage.directories.detect{ |d| d.key == s3_bucket_name }
|
74
|
+
yield(directory)
|
75
|
+
else
|
76
|
+
raise "Couldn't find S3 bucket '#{s3_bucket_name}'"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def step_files
|
81
|
+
FileList.new(File.join(local_path, "*"))
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/pipely/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'pipely/build/daily_scheduler'
|
2
|
+
|
3
|
+
describe Pipely::Build::DailyScheduler do
|
4
|
+
|
5
|
+
let(:start_time) { "11:00:00" }
|
6
|
+
|
7
|
+
subject { described_class.new(start_time) }
|
8
|
+
|
9
|
+
describe "#period" do
|
10
|
+
it "is '24 hours'" do
|
11
|
+
expect(subject.period).to eq('24 hours')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "#start_date_time" do
|
16
|
+
context "if the start time has already happened today in UTC" do
|
17
|
+
it "chooses the start time tomorrow" do
|
18
|
+
Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
|
19
|
+
expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context "if the start time has not happened yet today in UTC" do
|
25
|
+
it "chooses the start time today" do
|
26
|
+
Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do
|
27
|
+
expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'pipely/build/right_now_scheduler'
|
2
|
+
|
3
|
+
describe Pipely::Build::RightNowScheduler do
|
4
|
+
|
5
|
+
describe "#period" do
|
6
|
+
it "is '12 hours'" do
|
7
|
+
expect(subject.period).to eq('12 hours')
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "#start_date_time" do
|
12
|
+
it "chooses the current time as the start time" do
|
13
|
+
Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
|
14
|
+
expect(subject.start_date_time).to eq("2013-06-12T16:12:30")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'pipely/build/s3_path_builder'
|
2
|
+
|
3
|
+
describe Pipely::Build::S3PathBuilder do
|
4
|
+
|
5
|
+
subject {
|
6
|
+
described_class.new(
|
7
|
+
logs: 'log-bucket',
|
8
|
+
steps: 'step-bucket',
|
9
|
+
assets: 'asset-bucket',
|
10
|
+
prefix: 'run-prefix',
|
11
|
+
)
|
12
|
+
}
|
13
|
+
|
14
|
+
its(:s3_log_prefix) {
|
15
|
+
should eq("s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
16
|
+
}
|
17
|
+
|
18
|
+
its(:s3_step_prefix) {
|
19
|
+
should eq("s3://step-bucket/run-prefix")
|
20
|
+
}
|
21
|
+
|
22
|
+
its(:s3n_step_prefix) {
|
23
|
+
should eq("s3n://step-bucket/run-prefix")
|
24
|
+
}
|
25
|
+
|
26
|
+
its(:s3_asset_prefix) {
|
27
|
+
should eq("s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
28
|
+
}
|
29
|
+
|
30
|
+
its(:s3n_asset_prefix) {
|
31
|
+
should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
32
|
+
}
|
33
|
+
|
34
|
+
describe "#to_hash" do
|
35
|
+
it 'includes the necessary keys for supplying config to a Template' do
|
36
|
+
expect(subject.to_hash.keys).to match_array([
|
37
|
+
:s3_log_prefix,
|
38
|
+
:s3_step_prefix,
|
39
|
+
:s3n_step_prefix,
|
40
|
+
:s3_asset_prefix,
|
41
|
+
:s3n_asset_prefix,
|
42
|
+
])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'pipely/build/template'
|
2
|
+
|
3
|
+
describe Pipely::Build::Template do
|
4
|
+
let(:source) { "some test json <%= foo %>" }
|
5
|
+
|
6
|
+
subject { described_class.new(source) }
|
7
|
+
|
8
|
+
context 'given some configuration' do
|
9
|
+
let(:foo) { 'asdfgwrytqfadfa' }
|
10
|
+
let(:expected_json) { "some test json #{foo}" }
|
11
|
+
|
12
|
+
before do
|
13
|
+
subject.apply_config({ foo: foo })
|
14
|
+
end
|
15
|
+
|
16
|
+
its(:to_json) { should eq(expected_json) }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#streaming_hadoop_step(options)" do
|
20
|
+
let(:s3_path_builder) {
|
21
|
+
Pipely::Build::S3PathBuilder.new(
|
22
|
+
logs: 'log-bucket',
|
23
|
+
steps: 'step-bucket',
|
24
|
+
assets: 'asset-bucket',
|
25
|
+
prefix: 'run-prefix'
|
26
|
+
)
|
27
|
+
}
|
28
|
+
|
29
|
+
before do
|
30
|
+
subject.apply_config(s3_path_builder.to_hash)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "builds a streaming hadoop step" do
|
34
|
+
step = subject.streaming_hadoop_step(
|
35
|
+
:input => '/input_dir/',
|
36
|
+
:output => '/output_dir/',
|
37
|
+
:mapper => '/mapper.rb',
|
38
|
+
:reducer => '/reducer.rb'
|
39
|
+
)
|
40
|
+
|
41
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,s3n://step-bucket/run-prefix/reducer.rb")
|
42
|
+
end
|
43
|
+
|
44
|
+
context "given an array of inputs" do
|
45
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
46
|
+
step = subject.streaming_hadoop_step(
|
47
|
+
:input => ['/input_dir/', '/input_dir2/'],
|
48
|
+
:output => '/output_dir/',
|
49
|
+
:mapper => '/mapper.rb',
|
50
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
51
|
+
)
|
52
|
+
|
53
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir2/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "given a cacheFile" do
|
58
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
59
|
+
step = subject.streaming_hadoop_step(
|
60
|
+
:input => '/input_dir/',
|
61
|
+
:output => '/output_dir/',
|
62
|
+
:cache_file => '/cache_file#cache_file',
|
63
|
+
:mapper => '/mapper.rb',
|
64
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
65
|
+
)
|
66
|
+
|
67
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer,-cacheFile,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/cache_file#cache_file")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "given the IdentityReducer" do
|
72
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
73
|
+
step = subject.streaming_hadoop_step(
|
74
|
+
:input => '/input_dir/',
|
75
|
+
:output => '/output_dir/',
|
76
|
+
:mapper => '/mapper.rb',
|
77
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
78
|
+
)
|
79
|
+
|
80
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pipely/deploy'
|
3
|
+
|
4
|
+
describe Pipely::Deploy::Client do
|
5
|
+
|
6
|
+
describe "#deploy_pipeline" do
|
7
|
+
let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
|
8
|
+
let(:new_pipeline_id) { "pipeline-three" }
|
9
|
+
let(:pipeline_name) { "MyPipeline" }
|
10
|
+
let(:definition) { "pipeline json" }
|
11
|
+
|
12
|
+
it "gets a list of pipelines, creates a new one, and deletes the others" do
|
13
|
+
subject.should_receive(:existing_pipelines).
|
14
|
+
and_return(existing_pipeline_ids)
|
15
|
+
|
16
|
+
subject.should_receive(:create_pipeline).
|
17
|
+
with(pipeline_name, anything()).
|
18
|
+
and_return(new_pipeline_id)
|
19
|
+
|
20
|
+
existing_pipeline_ids.each do |id|
|
21
|
+
subject.should_receive(:delete_pipeline).with(id)
|
22
|
+
end
|
23
|
+
|
24
|
+
subject.deploy_pipeline(pipeline_name, definition)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pipely
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Gillooly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-graphviz
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.
|
61
|
+
version: 1.19.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 1.
|
68
|
+
version: 1.19.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: unf
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,34 @@ dependencies:
|
|
80
80
|
- - '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: uuidtools
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: activesupport
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: rspec
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +136,20 @@ dependencies:
|
|
108
136
|
- - '>='
|
109
137
|
- !ruby/object:Gem::Version
|
110
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: timecop
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
111
153
|
description:
|
112
154
|
email:
|
113
155
|
- matt@swipely.com
|
@@ -116,24 +158,46 @@ executables:
|
|
116
158
|
extensions: []
|
117
159
|
extra_rdoc_files: []
|
118
160
|
files:
|
161
|
+
- lib/pipely/build/daily_scheduler.rb
|
162
|
+
- lib/pipely/build/definition.rb
|
163
|
+
- lib/pipely/build/environment_config.rb
|
164
|
+
- lib/pipely/build/right_now_scheduler.rb
|
165
|
+
- lib/pipely/build/s3_path_builder.rb
|
166
|
+
- lib/pipely/build/template.rb
|
167
|
+
- lib/pipely/build/template_helpers.rb
|
168
|
+
- lib/pipely/build.rb
|
119
169
|
- lib/pipely/component.rb
|
120
170
|
- lib/pipely/definition.rb
|
121
171
|
- lib/pipely/dependency.rb
|
172
|
+
- lib/pipely/deploy/client.rb
|
173
|
+
- lib/pipely/deploy.rb
|
122
174
|
- lib/pipely/fog_client.rb
|
123
175
|
- lib/pipely/graph_builder.rb
|
124
176
|
- lib/pipely/live_pipeline.rb
|
125
177
|
- lib/pipely/reference_list.rb
|
126
178
|
- lib/pipely/runs_report.rb
|
179
|
+
- lib/pipely/tasks/definition.rb
|
180
|
+
- lib/pipely/tasks/deploy.rb
|
181
|
+
- lib/pipely/tasks/graph.rb
|
182
|
+
- lib/pipely/tasks/upload_steps.rb
|
183
|
+
- lib/pipely/tasks.rb
|
127
184
|
- lib/pipely/version.rb
|
128
185
|
- lib/pipely.rb
|
129
186
|
- Rakefile
|
130
187
|
- README.md
|
188
|
+
- spec/lib/pipely/build/daily_scheduler_spec.rb
|
189
|
+
- spec/lib/pipely/build/right_now_scheduler_spec.rb
|
190
|
+
- spec/lib/pipely/build/s3_path_builder_spec.rb
|
191
|
+
- spec/lib/pipely/build/template_spec.rb
|
192
|
+
- spec/lib/pipely/build_spec.rb
|
131
193
|
- spec/lib/pipely/component_spec.rb
|
132
194
|
- spec/lib/pipely/definition_spec.rb
|
133
195
|
- spec/lib/pipely/dependency_spec.rb
|
196
|
+
- spec/lib/pipely/deploy/client_spec.rb
|
134
197
|
- spec/lib/pipely/graph_builder_spec.rb
|
135
198
|
- spec/lib/pipely/reference_list_spec.rb
|
136
199
|
- spec/lib/pipely_spec.rb
|
200
|
+
- spec/spec_helper.rb
|
137
201
|
- bin/pipely
|
138
202
|
homepage: http://github.com/swipely/pipely
|
139
203
|
licenses:
|
@@ -160,9 +224,16 @@ signing_key:
|
|
160
224
|
specification_version: 4
|
161
225
|
summary: Generate dependency graphs from pipeline definitions.
|
162
226
|
test_files:
|
227
|
+
- spec/lib/pipely/build/daily_scheduler_spec.rb
|
228
|
+
- spec/lib/pipely/build/right_now_scheduler_spec.rb
|
229
|
+
- spec/lib/pipely/build/s3_path_builder_spec.rb
|
230
|
+
- spec/lib/pipely/build/template_spec.rb
|
231
|
+
- spec/lib/pipely/build_spec.rb
|
163
232
|
- spec/lib/pipely/component_spec.rb
|
164
233
|
- spec/lib/pipely/definition_spec.rb
|
165
234
|
- spec/lib/pipely/dependency_spec.rb
|
235
|
+
- spec/lib/pipely/deploy/client_spec.rb
|
166
236
|
- spec/lib/pipely/graph_builder_spec.rb
|
167
237
|
- spec/lib/pipely/reference_list_spec.rb
|
168
238
|
- spec/lib/pipely_spec.rb
|
239
|
+
- spec/spec_helper.rb
|