pipely 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pipely/build.rb +31 -0
- data/lib/pipely/build/daily_scheduler.rb +37 -0
- data/lib/pipely/build/definition.rb +26 -0
- data/lib/pipely/build/environment_config.rb +34 -0
- data/lib/pipely/build/right_now_scheduler.rb +27 -0
- data/lib/pipely/build/s3_path_builder.rb +52 -0
- data/lib/pipely/build/template.rb +43 -0
- data/lib/pipely/build/template_helpers.rb +53 -0
- data/lib/pipely/deploy.rb +1 -0
- data/lib/pipely/deploy/client.rb +79 -0
- data/lib/pipely/tasks.rb +9 -0
- data/lib/pipely/tasks/definition.rb +69 -0
- data/lib/pipely/tasks/deploy.rb +58 -0
- data/lib/pipely/tasks/graph.rb +75 -0
- data/lib/pipely/tasks/upload_steps.rb +86 -0
- data/lib/pipely/version.rb +1 -1
- data/spec/lib/pipely/build/daily_scheduler_spec.rb +33 -0
- data/spec/lib/pipely/build/right_now_scheduler_spec.rb +19 -0
- data/spec/lib/pipely/build/s3_path_builder_spec.rb +46 -0
- data/spec/lib/pipely/build/template_spec.rb +85 -0
- data/spec/lib/pipely/build_spec.rb +3 -0
- data/spec/lib/pipely/deploy/client_spec.rb +28 -0
- data/spec/spec_helper.rb +9 -0
- metadata +75 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e753a1c8b9207a58a13e7f4454b4d777ee2b42ba
|
4
|
+
data.tar.gz: af3d1bbe539a028bcbc58c5e6f1ca2df908f6a41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56fdf912f37ebb81a7f17a7a8a6c5686228165045d5917a48f12621b4a83357b7bb73fcb0e3562c0a0df9f53ec8d5131d484056d0f8283a819b1f04daf0c46f9
|
7
|
+
data.tar.gz: 6442124a52915c429e9a2e0ada617b687c10c5db887b2c84f1b77f3a96ec1e4e7a0cd789240e594f82807ab5821ebf1f973960654a35a7f29c7375cc4d2d1708
|
data/lib/pipely/build.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'pipely/build/definition'
|
2
|
+
require 'pipely/build/template'
|
3
|
+
require 'pipely/build/daily_scheduler'
|
4
|
+
require 'pipely/build/right_now_scheduler'
|
5
|
+
require 'pipely/build/s3_path_builder'
|
6
|
+
require 'pipely/build/environment_config'
|
7
|
+
|
8
|
+
module Pipely
|
9
|
+
|
10
|
+
# Turn Templates+config into a deployable Definition.
|
11
|
+
#
|
12
|
+
module Build
|
13
|
+
|
14
|
+
def self.build_definition(template, environment, config_path)
|
15
|
+
env = environment.to_sym
|
16
|
+
config = EnvironmentConfig.load(config_path, env)
|
17
|
+
|
18
|
+
case environment.to_sym
|
19
|
+
when :production
|
20
|
+
s3_prefix = "production/#{config[:namespace]}"
|
21
|
+
scheduler = DailyScheduler.new
|
22
|
+
when :staging
|
23
|
+
s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
|
24
|
+
scheduler = RightNowScheduler.new
|
25
|
+
end
|
26
|
+
|
27
|
+
Definition.new(template, env, s3_prefix, scheduler, config)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Compute schedule attributes for a pipeline that runs once-a-day at a set
|
5
|
+
# time.
|
6
|
+
#
|
7
|
+
class DailyScheduler
|
8
|
+
|
9
|
+
def initialize(start_time="11:00:00")
|
10
|
+
@start_time = start_time
|
11
|
+
end
|
12
|
+
|
13
|
+
def period
|
14
|
+
'24 hours'
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_date_time
|
18
|
+
date = Date.today
|
19
|
+
|
20
|
+
# if start_time already happened today, wait for tomorrow's start_time
|
21
|
+
now_time = Time.now.utc.strftime('%H:%M:%S')
|
22
|
+
date += 1 if now_time >= @start_time
|
23
|
+
|
24
|
+
date.strftime("%Y-%m-%dT#{@start_time}")
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_hash
|
28
|
+
{
|
29
|
+
:period => period,
|
30
|
+
:start_date_time => start_date_time
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Represent a pipeline definition, built from a Template and some config.
|
5
|
+
#
|
6
|
+
class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
|
7
|
+
def pipeline_name
|
8
|
+
config[:name]
|
9
|
+
end
|
10
|
+
|
11
|
+
def s3_path_builder
|
12
|
+
S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_json
|
16
|
+
template.apply_config(:environment => env)
|
17
|
+
template.apply_config(config)
|
18
|
+
template.apply_config(s3_path_builder.to_hash)
|
19
|
+
template.apply_config(scheduler.to_hash)
|
20
|
+
|
21
|
+
template.to_json
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Pipely
|
4
|
+
module Build
|
5
|
+
|
6
|
+
# Work with YAML config files that contain parallel configs for various
|
7
|
+
# environments.
|
8
|
+
#
|
9
|
+
class EnvironmentConfig < Hash
|
10
|
+
|
11
|
+
def self.load(filename, environment)
|
12
|
+
raw = YAML.load_file(filename)[environment.to_s]
|
13
|
+
load_from_hash(raw)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.load_from_hash(attributes)
|
17
|
+
config = new
|
18
|
+
|
19
|
+
attributes.each do |k, v|
|
20
|
+
case v
|
21
|
+
when Hash
|
22
|
+
config[k.to_sym] = load_from_hash(v)
|
23
|
+
else
|
24
|
+
config[k.to_sym] = v.clone
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
config
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Compute schedule attributes for a pipeline that should run immediately
|
5
|
+
# after being deployed.
|
6
|
+
#
|
7
|
+
class RightNowScheduler
|
8
|
+
|
9
|
+
def period
|
10
|
+
'12 hours'
|
11
|
+
end
|
12
|
+
|
13
|
+
def start_date_time
|
14
|
+
Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S")
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_hash
|
18
|
+
{
|
19
|
+
:period => period,
|
20
|
+
:start_date_time => start_date_time
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Builds paths to assets, logs, and steps that are on S3.
|
5
|
+
#
|
6
|
+
class S3PathBuilder
|
7
|
+
|
8
|
+
attr_reader :assets_bucket, :logs_bucket, :steps_bucket
|
9
|
+
|
10
|
+
START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
@assets_bucket = options[:assets]
|
14
|
+
@logs_bucket = options[:logs]
|
15
|
+
@steps_bucket = options[:steps]
|
16
|
+
@s3prefix = options[:prefix]
|
17
|
+
end
|
18
|
+
|
19
|
+
def s3_log_prefix
|
20
|
+
"s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def s3_step_prefix
|
24
|
+
"s3://#{@steps_bucket}/#{@s3prefix}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def s3n_step_prefix
|
28
|
+
"s3n://#{@steps_bucket}/#{@s3prefix}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def s3_asset_prefix
|
32
|
+
"s3://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
|
33
|
+
end
|
34
|
+
|
35
|
+
def s3n_asset_prefix
|
36
|
+
"s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_hash
|
40
|
+
{
|
41
|
+
:s3_log_prefix => s3_log_prefix,
|
42
|
+
:s3_step_prefix => s3_step_prefix,
|
43
|
+
:s3n_step_prefix => s3n_step_prefix,
|
44
|
+
:s3_asset_prefix => s3_asset_prefix,
|
45
|
+
:s3n_asset_prefix => s3n_asset_prefix
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
require 'pipely/build/template_helpers'
|
5
|
+
|
6
|
+
module Pipely
|
7
|
+
module Build
|
8
|
+
|
9
|
+
# An ERB template that can be interpolated with config hashes to render a
|
10
|
+
# deployable pipeline definition.
|
11
|
+
#
|
12
|
+
class Template
|
13
|
+
include TemplateHelpers
|
14
|
+
|
15
|
+
def initialize(source)
|
16
|
+
@source = source
|
17
|
+
@config = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def apply_config(attributes)
|
21
|
+
@config.merge!(attributes.symbolize_keys)
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_json
|
25
|
+
ERB.new(@source).result(binding)
|
26
|
+
end
|
27
|
+
|
28
|
+
def respond_to_missing(method_name, include_private=false)
|
29
|
+
@config.keys.include?(method_name.to_s) || super
|
30
|
+
end
|
31
|
+
|
32
|
+
def method_missing(method_name, *args, &block)
|
33
|
+
if @config.keys.include?(method_name)
|
34
|
+
@config[method_name]
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Pipely
|
2
|
+
module Build
|
3
|
+
|
4
|
+
# Helper methods used by ERB templates.
|
5
|
+
#
|
6
|
+
module TemplateHelpers
|
7
|
+
|
8
|
+
def s3_asset_path(path)
|
9
|
+
"#{s3_asset_prefix if '/' == path[0]}#{path}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def s3n_asset_path(path)
|
13
|
+
"#{s3n_asset_prefix if '/' == path[0]}#{path}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def s3n_step_path(path)
|
17
|
+
"#{s3n_step_prefix if '/' == path[0]}#{path}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def streaming_hadoop_step(options)
|
21
|
+
parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
|
22
|
+
|
23
|
+
Array(options[:input]).each do |input|
|
24
|
+
parts += [ '-input', s3n_asset_path(input) ]
|
25
|
+
end
|
26
|
+
|
27
|
+
Array(options[:output]).each do |output|
|
28
|
+
parts += ['-output', s3_asset_path(output) ]
|
29
|
+
end
|
30
|
+
|
31
|
+
Array(options[:mapper]).each do |mapper|
|
32
|
+
parts += ['-mapper', s3n_step_path(mapper) ]
|
33
|
+
end
|
34
|
+
|
35
|
+
Array(options[:reducer]).each do |reducer|
|
36
|
+
parts += ['-reducer', s3n_step_path(reducer) ]
|
37
|
+
end
|
38
|
+
|
39
|
+
Array(options[:cache_file]).each do |cache_file|
|
40
|
+
parts += ['-cacheFile', s3n_asset_path(cache_file)]
|
41
|
+
end
|
42
|
+
|
43
|
+
(options[:env] || {}).each do |name, value|
|
44
|
+
parts += ['-cmdenv', "#{name}=#{value}"]
|
45
|
+
end
|
46
|
+
|
47
|
+
parts.join(',')
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'pipely/deploy/client'
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'fog'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'uuidtools'
|
5
|
+
|
6
|
+
module Pipely
|
7
|
+
module Deploy
|
8
|
+
|
9
|
+
# Client for managing deployment of rendered definitions.
|
10
|
+
#
|
11
|
+
class Client
|
12
|
+
|
13
|
+
# Generic error representing failure to deploy a rendered definition.
|
14
|
+
class PipelineDeployerError < RuntimeError; end
|
15
|
+
|
16
|
+
def initialize(log=nil)
|
17
|
+
@log = log || Logger.new(STDOUT)
|
18
|
+
@data_pipelines = Fog::AWS::DataPipeline.new
|
19
|
+
end
|
20
|
+
|
21
|
+
def deploy_pipeline(pipeline_name, definition)
|
22
|
+
# Get a list of all existing pipelines
|
23
|
+
pipeline_ids = existing_pipelines(pipeline_name)
|
24
|
+
@log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
|
25
|
+
|
26
|
+
# Create new pipeline
|
27
|
+
created_pipeline_id = create_pipeline(pipeline_name, definition)
|
28
|
+
@log.info("Created pipeline id '#{created_pipeline_id}'")
|
29
|
+
|
30
|
+
# Delete old pipelines
|
31
|
+
pipeline_ids.each do |pipeline_id|
|
32
|
+
begin
|
33
|
+
delete_pipeline(pipeline_id)
|
34
|
+
@log.info("Deleted pipeline '#{pipeline_id}'")
|
35
|
+
|
36
|
+
rescue PipelineDeployerError => error
|
37
|
+
@log.warn(error)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def existing_pipelines(pipeline_name)
|
43
|
+
ids = []
|
44
|
+
|
45
|
+
begin
|
46
|
+
result = Fog::AWS[:data_pipeline].list_pipelines
|
47
|
+
|
48
|
+
ids += result['pipelineIdList'].
|
49
|
+
select { |p| p['name'] == pipeline_name }.
|
50
|
+
map { |p| p['id'] }
|
51
|
+
|
52
|
+
end while (result['hasMoreResults'] && result['marker'])
|
53
|
+
|
54
|
+
ids
|
55
|
+
end
|
56
|
+
|
57
|
+
def create_pipeline(pipeline_name, definition)
|
58
|
+
definition_objects = JSON.parse(definition)['objects']
|
59
|
+
|
60
|
+
unique_id = UUIDTools::UUID.random_create
|
61
|
+
|
62
|
+
created_pipeline = @data_pipelines.pipelines.create(
|
63
|
+
unique_id: unique_id,
|
64
|
+
name: pipeline_name
|
65
|
+
)
|
66
|
+
|
67
|
+
created_pipeline.put(definition_objects)
|
68
|
+
created_pipeline.activate
|
69
|
+
|
70
|
+
created_pipeline.id
|
71
|
+
end
|
72
|
+
|
73
|
+
def delete_pipeline(pipeline_id)
|
74
|
+
@data_pipelines.pipelines.get(pipeline_id).destroy
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/lib/pipely/tasks.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Definition < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :definition
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Path where rendered definitions are written.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "definitions"
|
20
|
+
attr_accessor :path
|
21
|
+
|
22
|
+
# Pipeline definition instance
|
23
|
+
attr_accessor :definition
|
24
|
+
|
25
|
+
# Use verbose output. If this is set to true, the task will print the
|
26
|
+
# local and remote paths of each step file it uploads to S3.
|
27
|
+
#
|
28
|
+
# default:
|
29
|
+
# true
|
30
|
+
attr_accessor :verbose
|
31
|
+
|
32
|
+
def initialize(*args, &task_block)
|
33
|
+
setup_ivars(args)
|
34
|
+
|
35
|
+
directory path
|
36
|
+
|
37
|
+
desc "Graphs the full pipeline definition using Graphviz"
|
38
|
+
task name => path do |_, task_args|
|
39
|
+
RakeFileUtils.send(:verbose, verbose) do
|
40
|
+
if task_block
|
41
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
42
|
+
end
|
43
|
+
|
44
|
+
run_task verbose
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def setup_ivars(args)
|
50
|
+
@name = args.shift || :definition
|
51
|
+
@verbose = true
|
52
|
+
@path = "definitions"
|
53
|
+
end
|
54
|
+
|
55
|
+
def run_task(verbose)
|
56
|
+
puts "Generating #{target_filename}" if verbose
|
57
|
+
|
58
|
+
File.open(target_filename, 'w') do |file|
|
59
|
+
file.write(definition.to_json)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def target_filename
|
64
|
+
"#{path}/#{definition.base_filename}.json"
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely/deploy'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Deploy < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :deploy
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Pipeline definition instance
|
17
|
+
attr_accessor :definition
|
18
|
+
|
19
|
+
# Use verbose output. If this is set to true, the task will print the
|
20
|
+
# local and remote paths of each step file it uploads to S3.
|
21
|
+
#
|
22
|
+
# default:
|
23
|
+
# true
|
24
|
+
attr_accessor :verbose
|
25
|
+
|
26
|
+
def initialize(*args, &task_block)
|
27
|
+
setup_ivars(args)
|
28
|
+
|
29
|
+
desc "Deploy pipeline" unless ::Rake.application.last_comment
|
30
|
+
|
31
|
+
task name, *args do |_, task_args|
|
32
|
+
RakeFileUtils.send(:verbose, verbose) do
|
33
|
+
if task_block
|
34
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
35
|
+
end
|
36
|
+
|
37
|
+
run_task verbose
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def setup_ivars(args)
|
43
|
+
@name = args.shift || :deploy
|
44
|
+
@verbose = true
|
45
|
+
end
|
46
|
+
|
47
|
+
def run_task(verbose)
|
48
|
+
Rake::Task["upload_steps"].invoke
|
49
|
+
|
50
|
+
Pipely::Deploy::Client.new.deploy_pipeline(
|
51
|
+
definition.pipeline_name,
|
52
|
+
definition.to_json
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'pipely'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class Graph < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :graph
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Path to write graph images to.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "graphs"
|
20
|
+
attr_accessor :path
|
21
|
+
|
22
|
+
# Pipeline definition instance
|
23
|
+
attr_accessor :definition
|
24
|
+
|
25
|
+
# Use verbose output. If this is set to true, the task will print the
|
26
|
+
# local and remote paths of each step file it uploads to S3.
|
27
|
+
#
|
28
|
+
# default:
|
29
|
+
# true
|
30
|
+
attr_accessor :verbose
|
31
|
+
|
32
|
+
def initialize(*args, &task_block)
|
33
|
+
setup_ivars(args)
|
34
|
+
|
35
|
+
# create the `path` directory if it doesn't exist
|
36
|
+
directory path
|
37
|
+
|
38
|
+
namespace name do
|
39
|
+
task :full => path do |_, task_args|
|
40
|
+
RakeFileUtils.send(:verbose, verbose) do
|
41
|
+
if task_block
|
42
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
43
|
+
end
|
44
|
+
|
45
|
+
run_task verbose
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
task :open => :full do
|
50
|
+
`open #{target_filename}`
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "Graphs the full pipeline definition using Graphviz"
|
55
|
+
task name => "#{name}:full"
|
56
|
+
end
|
57
|
+
|
58
|
+
def setup_ivars(args)
|
59
|
+
@name = args.shift || :graph
|
60
|
+
@verbose = true
|
61
|
+
@path = "graphs"
|
62
|
+
end
|
63
|
+
|
64
|
+
def run_task(verbose)
|
65
|
+
puts "Generating #{target_filename}" if verbose
|
66
|
+
Pipely.draw(definition.to_json, target_filename)
|
67
|
+
end
|
68
|
+
|
69
|
+
def target_filename
|
70
|
+
"#{path}/#{definition.base_filename}.png"
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'fog'
|
4
|
+
|
5
|
+
module Pipely
|
6
|
+
module Tasks
|
7
|
+
class UploadSteps < ::Rake::TaskLib
|
8
|
+
include ::Rake::DSL if defined?(::Rake::DSL)
|
9
|
+
|
10
|
+
# Name of task.
|
11
|
+
#
|
12
|
+
# default:
|
13
|
+
# :upload_steps
|
14
|
+
attr_accessor :name
|
15
|
+
|
16
|
+
# Local path to where the step files are.
|
17
|
+
#
|
18
|
+
# default:
|
19
|
+
# "steps"
|
20
|
+
attr_accessor :local_path
|
21
|
+
|
22
|
+
# Name of S3 bucket to upload steps to.
|
23
|
+
attr_accessor :s3_bucket_name
|
24
|
+
|
25
|
+
# Path within S3 bucket to upload steps to.
|
26
|
+
attr_accessor :s3_path
|
27
|
+
|
28
|
+
# Use verbose output. If this is set to true, the task will print the
|
29
|
+
# local and remote paths of each step file it uploads to S3.
|
30
|
+
#
|
31
|
+
# default:
|
32
|
+
# true
|
33
|
+
attr_accessor :verbose
|
34
|
+
|
35
|
+
def initialize(*args, &task_block)
|
36
|
+
setup_ivars(args)
|
37
|
+
|
38
|
+
unless ::Rake.application.last_comment
|
39
|
+
desc "Upload Data Pipeline steps to S3"
|
40
|
+
end
|
41
|
+
|
42
|
+
task name, *args do |_, task_args|
|
43
|
+
RakeFileUtils.send(:verbose, verbose) do
|
44
|
+
if task_block
|
45
|
+
task_block.call(*[self, task_args].slice(0, task_block.arity))
|
46
|
+
end
|
47
|
+
|
48
|
+
run_task verbose
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def setup_ivars(args)
|
54
|
+
@name = args.shift || :upload_steps
|
55
|
+
@verbose = true
|
56
|
+
@local_path = "steps"
|
57
|
+
end
|
58
|
+
|
59
|
+
def run_task(verbose)
|
60
|
+
with_bucket do |directory|
|
61
|
+
step_files.each do |file_name|
|
62
|
+
dest = "#{s3_path}/#{File.basename(file_name)}"
|
63
|
+
puts "uploading #{dest}" if verbose
|
64
|
+
directory.files.create(key: dest, body: File.read(file_name))
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def with_bucket
|
72
|
+
storage = Fog::Storage.new({ provider: 'AWS' })
|
73
|
+
if directory = storage.directories.detect{ |d| d.key == s3_bucket_name }
|
74
|
+
yield(directory)
|
75
|
+
else
|
76
|
+
raise "Couldn't find S3 bucket '#{s3_bucket_name}'"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def step_files
|
81
|
+
FileList.new(File.join(local_path, "*"))
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/pipely/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'pipely/build/daily_scheduler'
|
2
|
+
|
3
|
+
describe Pipely::Build::DailyScheduler do
|
4
|
+
|
5
|
+
let(:start_time) { "11:00:00" }
|
6
|
+
|
7
|
+
subject { described_class.new(start_time) }
|
8
|
+
|
9
|
+
describe "#period" do
|
10
|
+
it "is '24 hours'" do
|
11
|
+
expect(subject.period).to eq('24 hours')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "#start_date_time" do
|
16
|
+
context "if the start time has already happened today in UTC" do
|
17
|
+
it "chooses the start time tomorrow" do
|
18
|
+
Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
|
19
|
+
expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context "if the start time has not happened yet today in UTC" do
|
25
|
+
it "chooses the start time today" do
|
26
|
+
Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do
|
27
|
+
expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'pipely/build/right_now_scheduler'
|
2
|
+
|
3
|
+
describe Pipely::Build::RightNowScheduler do
|
4
|
+
|
5
|
+
describe "#period" do
|
6
|
+
it "is '12 hours'" do
|
7
|
+
expect(subject.period).to eq('12 hours')
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
describe "#start_date_time" do
|
12
|
+
it "chooses the current time as the start time" do
|
13
|
+
Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
|
14
|
+
expect(subject.start_date_time).to eq("2013-06-12T16:12:30")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'pipely/build/s3_path_builder'
|
2
|
+
|
3
|
+
describe Pipely::Build::S3PathBuilder do
|
4
|
+
|
5
|
+
subject {
|
6
|
+
described_class.new(
|
7
|
+
logs: 'log-bucket',
|
8
|
+
steps: 'step-bucket',
|
9
|
+
assets: 'asset-bucket',
|
10
|
+
prefix: 'run-prefix',
|
11
|
+
)
|
12
|
+
}
|
13
|
+
|
14
|
+
its(:s3_log_prefix) {
|
15
|
+
should eq("s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
16
|
+
}
|
17
|
+
|
18
|
+
its(:s3_step_prefix) {
|
19
|
+
should eq("s3://step-bucket/run-prefix")
|
20
|
+
}
|
21
|
+
|
22
|
+
its(:s3n_step_prefix) {
|
23
|
+
should eq("s3n://step-bucket/run-prefix")
|
24
|
+
}
|
25
|
+
|
26
|
+
its(:s3_asset_prefix) {
|
27
|
+
should eq("s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
28
|
+
}
|
29
|
+
|
30
|
+
its(:s3n_asset_prefix) {
|
31
|
+
should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
32
|
+
}
|
33
|
+
|
34
|
+
describe "#to_hash" do
|
35
|
+
it 'includes the necessary keys for supplying config to a Template' do
|
36
|
+
expect(subject.to_hash.keys).to match_array([
|
37
|
+
:s3_log_prefix,
|
38
|
+
:s3_step_prefix,
|
39
|
+
:s3n_step_prefix,
|
40
|
+
:s3_asset_prefix,
|
41
|
+
:s3n_asset_prefix,
|
42
|
+
])
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'pipely/build/template'
|
2
|
+
|
3
|
+
describe Pipely::Build::Template do
|
4
|
+
let(:source) { "some test json <%= foo %>" }
|
5
|
+
|
6
|
+
subject { described_class.new(source) }
|
7
|
+
|
8
|
+
context 'given some configuration' do
|
9
|
+
let(:foo) { 'asdfgwrytqfadfa' }
|
10
|
+
let(:expected_json) { "some test json #{foo}" }
|
11
|
+
|
12
|
+
before do
|
13
|
+
subject.apply_config({ foo: foo })
|
14
|
+
end
|
15
|
+
|
16
|
+
its(:to_json) { should eq(expected_json) }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#streaming_hadoop_step(options)" do
|
20
|
+
let(:s3_path_builder) {
|
21
|
+
Pipely::Build::S3PathBuilder.new(
|
22
|
+
logs: 'log-bucket',
|
23
|
+
steps: 'step-bucket',
|
24
|
+
assets: 'asset-bucket',
|
25
|
+
prefix: 'run-prefix'
|
26
|
+
)
|
27
|
+
}
|
28
|
+
|
29
|
+
before do
|
30
|
+
subject.apply_config(s3_path_builder.to_hash)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "builds a streaming hadoop step" do
|
34
|
+
step = subject.streaming_hadoop_step(
|
35
|
+
:input => '/input_dir/',
|
36
|
+
:output => '/output_dir/',
|
37
|
+
:mapper => '/mapper.rb',
|
38
|
+
:reducer => '/reducer.rb'
|
39
|
+
)
|
40
|
+
|
41
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,s3n://step-bucket/run-prefix/reducer.rb")
|
42
|
+
end
|
43
|
+
|
44
|
+
context "given an array of inputs" do
|
45
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
46
|
+
step = subject.streaming_hadoop_step(
|
47
|
+
:input => ['/input_dir/', '/input_dir2/'],
|
48
|
+
:output => '/output_dir/',
|
49
|
+
:mapper => '/mapper.rb',
|
50
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
51
|
+
)
|
52
|
+
|
53
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir2/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context "given a cacheFile" do
|
58
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
59
|
+
step = subject.streaming_hadoop_step(
|
60
|
+
:input => '/input_dir/',
|
61
|
+
:output => '/output_dir/',
|
62
|
+
:cache_file => '/cache_file#cache_file',
|
63
|
+
:mapper => '/mapper.rb',
|
64
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
65
|
+
)
|
66
|
+
|
67
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer,-cacheFile,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/cache_file#cache_file")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
context "given the IdentityReducer" do
|
72
|
+
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
73
|
+
step = subject.streaming_hadoop_step(
|
74
|
+
:input => '/input_dir/',
|
75
|
+
:output => '/output_dir/',
|
76
|
+
:mapper => '/mapper.rb',
|
77
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
78
|
+
)
|
79
|
+
|
80
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'pipely/deploy'
|
3
|
+
|
4
|
+
describe Pipely::Deploy::Client do
|
5
|
+
|
6
|
+
describe "#deploy_pipeline" do
|
7
|
+
let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
|
8
|
+
let(:new_pipeline_id) { "pipeline-three" }
|
9
|
+
let(:pipeline_name) { "MyPipeline" }
|
10
|
+
let(:definition) { "pipeline json" }
|
11
|
+
|
12
|
+
it "gets a list of pipelines, creates a new one, and deletes the others" do
|
13
|
+
subject.should_receive(:existing_pipelines).
|
14
|
+
and_return(existing_pipeline_ids)
|
15
|
+
|
16
|
+
subject.should_receive(:create_pipeline).
|
17
|
+
with(pipeline_name, anything()).
|
18
|
+
and_return(new_pipeline_id)
|
19
|
+
|
20
|
+
existing_pipeline_ids.each do |id|
|
21
|
+
subject.should_receive(:delete_pipeline).with(id)
|
22
|
+
end
|
23
|
+
|
24
|
+
subject.deploy_pipeline(pipeline_name, definition)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pipely
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Gillooly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-graphviz
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.
|
61
|
+
version: 1.19.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 1.
|
68
|
+
version: 1.19.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: unf
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,34 @@ dependencies:
|
|
80
80
|
- - '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: uuidtools
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: activesupport
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: rspec
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +136,20 @@ dependencies:
|
|
108
136
|
- - '>='
|
109
137
|
- !ruby/object:Gem::Version
|
110
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: timecop
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
111
153
|
description:
|
112
154
|
email:
|
113
155
|
- matt@swipely.com
|
@@ -116,24 +158,46 @@ executables:
|
|
116
158
|
extensions: []
|
117
159
|
extra_rdoc_files: []
|
118
160
|
files:
|
161
|
+
- lib/pipely/build/daily_scheduler.rb
|
162
|
+
- lib/pipely/build/definition.rb
|
163
|
+
- lib/pipely/build/environment_config.rb
|
164
|
+
- lib/pipely/build/right_now_scheduler.rb
|
165
|
+
- lib/pipely/build/s3_path_builder.rb
|
166
|
+
- lib/pipely/build/template.rb
|
167
|
+
- lib/pipely/build/template_helpers.rb
|
168
|
+
- lib/pipely/build.rb
|
119
169
|
- lib/pipely/component.rb
|
120
170
|
- lib/pipely/definition.rb
|
121
171
|
- lib/pipely/dependency.rb
|
172
|
+
- lib/pipely/deploy/client.rb
|
173
|
+
- lib/pipely/deploy.rb
|
122
174
|
- lib/pipely/fog_client.rb
|
123
175
|
- lib/pipely/graph_builder.rb
|
124
176
|
- lib/pipely/live_pipeline.rb
|
125
177
|
- lib/pipely/reference_list.rb
|
126
178
|
- lib/pipely/runs_report.rb
|
179
|
+
- lib/pipely/tasks/definition.rb
|
180
|
+
- lib/pipely/tasks/deploy.rb
|
181
|
+
- lib/pipely/tasks/graph.rb
|
182
|
+
- lib/pipely/tasks/upload_steps.rb
|
183
|
+
- lib/pipely/tasks.rb
|
127
184
|
- lib/pipely/version.rb
|
128
185
|
- lib/pipely.rb
|
129
186
|
- Rakefile
|
130
187
|
- README.md
|
188
|
+
- spec/lib/pipely/build/daily_scheduler_spec.rb
|
189
|
+
- spec/lib/pipely/build/right_now_scheduler_spec.rb
|
190
|
+
- spec/lib/pipely/build/s3_path_builder_spec.rb
|
191
|
+
- spec/lib/pipely/build/template_spec.rb
|
192
|
+
- spec/lib/pipely/build_spec.rb
|
131
193
|
- spec/lib/pipely/component_spec.rb
|
132
194
|
- spec/lib/pipely/definition_spec.rb
|
133
195
|
- spec/lib/pipely/dependency_spec.rb
|
196
|
+
- spec/lib/pipely/deploy/client_spec.rb
|
134
197
|
- spec/lib/pipely/graph_builder_spec.rb
|
135
198
|
- spec/lib/pipely/reference_list_spec.rb
|
136
199
|
- spec/lib/pipely_spec.rb
|
200
|
+
- spec/spec_helper.rb
|
137
201
|
- bin/pipely
|
138
202
|
homepage: http://github.com/swipely/pipely
|
139
203
|
licenses:
|
@@ -160,9 +224,16 @@ signing_key:
|
|
160
224
|
specification_version: 4
|
161
225
|
summary: Generate dependency graphs from pipeline definitions.
|
162
226
|
test_files:
|
227
|
+
- spec/lib/pipely/build/daily_scheduler_spec.rb
|
228
|
+
- spec/lib/pipely/build/right_now_scheduler_spec.rb
|
229
|
+
- spec/lib/pipely/build/s3_path_builder_spec.rb
|
230
|
+
- spec/lib/pipely/build/template_spec.rb
|
231
|
+
- spec/lib/pipely/build_spec.rb
|
163
232
|
- spec/lib/pipely/component_spec.rb
|
164
233
|
- spec/lib/pipely/definition_spec.rb
|
165
234
|
- spec/lib/pipely/dependency_spec.rb
|
235
|
+
- spec/lib/pipely/deploy/client_spec.rb
|
166
236
|
- spec/lib/pipely/graph_builder_spec.rb
|
167
237
|
- spec/lib/pipely/reference_list_spec.rb
|
168
238
|
- spec/lib/pipely_spec.rb
|
239
|
+
- spec/spec_helper.rb
|