pipely 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 96b0105ea802c8495325b0b01df591a8a95369dc
4
- data.tar.gz: 02ea6bc3048878f4fd9fed396837179b6cc0302e
3
+ metadata.gz: e753a1c8b9207a58a13e7f4454b4d777ee2b42ba
4
+ data.tar.gz: af3d1bbe539a028bcbc58c5e6f1ca2df908f6a41
5
5
  SHA512:
6
- metadata.gz: 1393a450e4c71d8d9dc5477978fac3546092997f6112608c8bafb04dbc65efd5061f04e5eb232c70a816f05ff88a3e6615613afbf35657e459d44e60a0dc7fe7
7
- data.tar.gz: 29cfcc8c63636d99a2de3ccedfd6574f7a67784be90f4d731606a6674477c25c313b8d7e30a72e9019012700af55c5c8de3962eefa568560981d8b75cec070ec
6
+ metadata.gz: 56fdf912f37ebb81a7f17a7a8a6c5686228165045d5917a48f12621b4a83357b7bb73fcb0e3562c0a0df9f53ec8d5131d484056d0f8283a819b1f04daf0c46f9
7
+ data.tar.gz: 6442124a52915c429e9a2e0ada617b687c10c5db887b2c84f1b77f3a96ec1e4e7a0cd789240e594f82807ab5821ebf1f973960654a35a7f29c7375cc4d2d1708
@@ -0,0 +1,31 @@
1
+ require 'pipely/build/definition'
2
+ require 'pipely/build/template'
3
+ require 'pipely/build/daily_scheduler'
4
+ require 'pipely/build/right_now_scheduler'
5
+ require 'pipely/build/s3_path_builder'
6
+ require 'pipely/build/environment_config'
7
+
8
+ module Pipely
9
+
10
+ # Turn Templates+config into a deployable Definition.
11
+ #
12
+ module Build
13
+
14
+ def self.build_definition(template, environment, config_path)
15
+ env = environment.to_sym
16
+ config = EnvironmentConfig.load(config_path, env)
17
+
18
+ case environment.to_sym
19
+ when :production
20
+ s3_prefix = "production/#{config[:namespace]}"
21
+ scheduler = DailyScheduler.new
22
+ when :staging
23
+ s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
24
+ scheduler = RightNowScheduler.new
25
+ end
26
+
27
+ Definition.new(template, env, s3_prefix, scheduler, config)
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Compute schedule attributes for a pipeline that runs once-a-day at a set
5
+ # time.
6
+ #
7
+ class DailyScheduler
8
+
9
+ def initialize(start_time="11:00:00")
10
+ @start_time = start_time
11
+ end
12
+
13
+ def period
14
+ '24 hours'
15
+ end
16
+
17
+ def start_date_time
18
+ date = Date.today
19
+
20
+ # if start_time already happened today, wait for tomorrow's start_time
21
+ now_time = Time.now.utc.strftime('%H:%M:%S')
22
+ date += 1 if now_time >= @start_time
23
+
24
+ date.strftime("%Y-%m-%dT#{@start_time}")
25
+ end
26
+
27
+ def to_hash
28
+ {
29
+ :period => period,
30
+ :start_date_time => start_date_time
31
+ }
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,26 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Represent a pipeline definition, built from a Template and some config.
5
+ #
6
+ class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
7
+ def pipeline_name
8
+ config[:name]
9
+ end
10
+
11
+ def s3_path_builder
12
+ S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
13
+ end
14
+
15
+ def to_json
16
+ template.apply_config(:environment => env)
17
+ template.apply_config(config)
18
+ template.apply_config(s3_path_builder.to_hash)
19
+ template.apply_config(scheduler.to_hash)
20
+
21
+ template.to_json
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,34 @@
1
+ require 'yaml'
2
+
3
+ module Pipely
4
+ module Build
5
+
6
+ # Work with YAML config files that contain parallel configs for various
7
+ # environments.
8
+ #
9
+ class EnvironmentConfig < Hash
10
+
11
+ def self.load(filename, environment)
12
+ raw = YAML.load_file(filename)[environment.to_s]
13
+ load_from_hash(raw)
14
+ end
15
+
16
+ def self.load_from_hash(attributes)
17
+ config = new
18
+
19
+ attributes.each do |k, v|
20
+ case v
21
+ when Hash
22
+ config[k.to_sym] = load_from_hash(v)
23
+ else
24
+ config[k.to_sym] = v.clone
25
+ end
26
+ end
27
+
28
+ config
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,27 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Compute schedule attributes for a pipeline that should run immediately
5
+ # after being deployed.
6
+ #
7
+ class RightNowScheduler
8
+
9
+ def period
10
+ '12 hours'
11
+ end
12
+
13
+ def start_date_time
14
+ Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S")
15
+ end
16
+
17
+ def to_hash
18
+ {
19
+ :period => period,
20
+ :start_date_time => start_date_time
21
+ }
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,52 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Builds paths to assets, logs, and steps that are on S3.
5
+ #
6
+ class S3PathBuilder
7
+
8
+ attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
+
10
+ START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
+
12
+ def initialize(options)
13
+ @assets_bucket = options[:assets]
14
+ @logs_bucket = options[:logs]
15
+ @steps_bucket = options[:steps]
16
+ @s3prefix = options[:prefix]
17
+ end
18
+
19
+ def s3_log_prefix
20
+ "s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
21
+ end
22
+
23
+ def s3_step_prefix
24
+ "s3://#{@steps_bucket}/#{@s3prefix}"
25
+ end
26
+
27
+ def s3n_step_prefix
28
+ "s3n://#{@steps_bucket}/#{@s3prefix}"
29
+ end
30
+
31
+ def s3_asset_prefix
32
+ "s3://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
33
+ end
34
+
35
+ def s3n_asset_prefix
36
+ "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
37
+ end
38
+
39
+ def to_hash
40
+ {
41
+ :s3_log_prefix => s3_log_prefix,
42
+ :s3_step_prefix => s3_step_prefix,
43
+ :s3n_step_prefix => s3n_step_prefix,
44
+ :s3_asset_prefix => s3_asset_prefix,
45
+ :s3n_asset_prefix => s3n_asset_prefix
46
+ }
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,43 @@
1
+ require 'active_support/core_ext/hash'
2
+ require 'erb'
3
+
4
+ require 'pipely/build/template_helpers'
5
+
6
+ module Pipely
7
+ module Build
8
+
9
+ # An ERB template that can be interpolated with config hashes to render a
10
+ # deployable pipeline definition.
11
+ #
12
+ class Template
13
+ include TemplateHelpers
14
+
15
+ def initialize(source)
16
+ @source = source
17
+ @config = {}
18
+ end
19
+
20
+ def apply_config(attributes)
21
+ @config.merge!(attributes.symbolize_keys)
22
+ end
23
+
24
+ def to_json
25
+ ERB.new(@source).result(binding)
26
+ end
27
+
28
+ def respond_to_missing(method_name, include_private=false)
29
+ @config.keys.include?(method_name.to_s) || super
30
+ end
31
+
32
+ def method_missing(method_name, *args, &block)
33
+ if @config.keys.include?(method_name)
34
+ @config[method_name]
35
+ else
36
+ super
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,53 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Helper methods used by ERB templates.
5
+ #
6
+ module TemplateHelpers
7
+
8
+ def s3_asset_path(path)
9
+ "#{s3_asset_prefix if '/' == path[0]}#{path}"
10
+ end
11
+
12
+ def s3n_asset_path(path)
13
+ "#{s3n_asset_prefix if '/' == path[0]}#{path}"
14
+ end
15
+
16
+ def s3n_step_path(path)
17
+ "#{s3n_step_prefix if '/' == path[0]}#{path}"
18
+ end
19
+
20
+ def streaming_hadoop_step(options)
21
+ parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
22
+
23
+ Array(options[:input]).each do |input|
24
+ parts += [ '-input', s3n_asset_path(input) ]
25
+ end
26
+
27
+ Array(options[:output]).each do |output|
28
+ parts += ['-output', s3_asset_path(output) ]
29
+ end
30
+
31
+ Array(options[:mapper]).each do |mapper|
32
+ parts += ['-mapper', s3n_step_path(mapper) ]
33
+ end
34
+
35
+ Array(options[:reducer]).each do |reducer|
36
+ parts += ['-reducer', s3n_step_path(reducer) ]
37
+ end
38
+
39
+ Array(options[:cache_file]).each do |cache_file|
40
+ parts += ['-cacheFile', s3n_asset_path(cache_file)]
41
+ end
42
+
43
+ (options[:env] || {}).each do |name, value|
44
+ parts += ['-cmdenv', "#{name}=#{value}"]
45
+ end
46
+
47
+ parts.join(',')
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1 @@
1
+ require 'pipely/deploy/client'
@@ -0,0 +1,79 @@
1
+ require 'fog'
2
+ require 'logger'
3
+ require 'tempfile'
4
+ require 'uuidtools'
5
+
6
+ module Pipely
7
+ module Deploy
8
+
9
+ # Client for managing deployment of rendered definitions.
10
+ #
11
+ class Client
12
+
13
+ # Generic error representing failure to deploy a rendered definition.
14
+ class PipelineDeployerError < RuntimeError; end
15
+
16
+ def initialize(log=nil)
17
+ @log = log || Logger.new(STDOUT)
18
+ @data_pipelines = Fog::AWS::DataPipeline.new
19
+ end
20
+
21
+ def deploy_pipeline(pipeline_name, definition)
22
+ # Get a list of all existing pipelines
23
+ pipeline_ids = existing_pipelines(pipeline_name)
24
+ @log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
25
+
26
+ # Create new pipeline
27
+ created_pipeline_id = create_pipeline(pipeline_name, definition)
28
+ @log.info("Created pipeline id '#{created_pipeline_id}'")
29
+
30
+ # Delete old pipelines
31
+ pipeline_ids.each do |pipeline_id|
32
+ begin
33
+ delete_pipeline(pipeline_id)
34
+ @log.info("Deleted pipeline '#{pipeline_id}'")
35
+
36
+ rescue PipelineDeployerError => error
37
+ @log.warn(error)
38
+ end
39
+ end
40
+ end
41
+
42
+ def existing_pipelines(pipeline_name)
43
+ ids = []
44
+
45
+ begin
46
+ result = Fog::AWS[:data_pipeline].list_pipelines
47
+
48
+ ids += result['pipelineIdList'].
49
+ select { |p| p['name'] == pipeline_name }.
50
+ map { |p| p['id'] }
51
+
52
+ end while (result['hasMoreResults'] && result['marker'])
53
+
54
+ ids
55
+ end
56
+
57
+ def create_pipeline(pipeline_name, definition)
58
+ definition_objects = JSON.parse(definition)['objects']
59
+
60
+ unique_id = UUIDTools::UUID.random_create
61
+
62
+ created_pipeline = @data_pipelines.pipelines.create(
63
+ unique_id: unique_id,
64
+ name: pipeline_name
65
+ )
66
+
67
+ created_pipeline.put(definition_objects)
68
+ created_pipeline.activate
69
+
70
+ created_pipeline.id
71
+ end
72
+
73
+ def delete_pipeline(pipeline_id)
74
+ @data_pipelines.pipelines.get(pipeline_id).destroy
75
+ end
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,9 @@
1
+ require 'pipely/tasks/upload_steps'
2
+ require 'pipely/tasks/deploy'
3
+ require 'pipely/tasks/graph'
4
+ require 'pipely/tasks/definition'
5
+
6
+ module Pipely
7
+ module Tasks
8
+ end
9
+ end
@@ -0,0 +1,69 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Definition < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :definition
14
+ attr_accessor :name
15
+
16
+ # Path where rendered definitions are written.
17
+ #
18
+ # default:
19
+ # "definitions"
20
+ attr_accessor :path
21
+
22
+ # Pipeline definition instance
23
+ attr_accessor :definition
24
+
25
+ # Use verbose output. If this is set to true, the task will print the
26
+ # local and remote paths of each step file it uploads to S3.
27
+ #
28
+ # default:
29
+ # true
30
+ attr_accessor :verbose
31
+
32
+ def initialize(*args, &task_block)
33
+ setup_ivars(args)
34
+
35
+ directory path
36
+
37
+ desc "Graphs the full pipeline definition using Graphviz"
38
+ task name => path do |_, task_args|
39
+ RakeFileUtils.send(:verbose, verbose) do
40
+ if task_block
41
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
42
+ end
43
+
44
+ run_task verbose
45
+ end
46
+ end
47
+ end
48
+
49
+ def setup_ivars(args)
50
+ @name = args.shift || :definition
51
+ @verbose = true
52
+ @path = "definitions"
53
+ end
54
+
55
+ def run_task(verbose)
56
+ puts "Generating #{target_filename}" if verbose
57
+
58
+ File.open(target_filename, 'w') do |file|
59
+ file.write(definition.to_json)
60
+ end
61
+ end
62
+
63
+ def target_filename
64
+ "#{path}/#{definition.base_filename}.json"
65
+ end
66
+
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,58 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely/deploy'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Deploy < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :deploy
14
+ attr_accessor :name
15
+
16
+ # Pipeline definition instance
17
+ attr_accessor :definition
18
+
19
+ # Use verbose output. If this is set to true, the task will print the
20
+ # local and remote paths of each step file it uploads to S3.
21
+ #
22
+ # default:
23
+ # true
24
+ attr_accessor :verbose
25
+
26
+ def initialize(*args, &task_block)
27
+ setup_ivars(args)
28
+
29
+ desc "Deploy pipeline" unless ::Rake.application.last_comment
30
+
31
+ task name, *args do |_, task_args|
32
+ RakeFileUtils.send(:verbose, verbose) do
33
+ if task_block
34
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
35
+ end
36
+
37
+ run_task verbose
38
+ end
39
+ end
40
+ end
41
+
42
+ def setup_ivars(args)
43
+ @name = args.shift || :deploy
44
+ @verbose = true
45
+ end
46
+
47
+ def run_task(verbose)
48
+ Rake::Task["upload_steps"].invoke
49
+
50
+ Pipely::Deploy::Client.new.deploy_pipeline(
51
+ definition.pipeline_name,
52
+ definition.to_json
53
+ )
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,75 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Graph < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :graph
14
+ attr_accessor :name
15
+
16
+ # Path to write graph images to.
17
+ #
18
+ # default:
19
+ # "graphs"
20
+ attr_accessor :path
21
+
22
+ # Pipeline definition instance
23
+ attr_accessor :definition
24
+
25
+ # Use verbose output. If this is set to true, the task will print the
26
+ # local and remote paths of each step file it uploads to S3.
27
+ #
28
+ # default:
29
+ # true
30
+ attr_accessor :verbose
31
+
32
+ def initialize(*args, &task_block)
33
+ setup_ivars(args)
34
+
35
+ # create the `path` directory if it doesn't exist
36
+ directory path
37
+
38
+ namespace name do
39
+ task :full => path do |_, task_args|
40
+ RakeFileUtils.send(:verbose, verbose) do
41
+ if task_block
42
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
43
+ end
44
+
45
+ run_task verbose
46
+ end
47
+ end
48
+
49
+ task :open => :full do
50
+ `open #{target_filename}`
51
+ end
52
+ end
53
+
54
+ desc "Graphs the full pipeline definition using Graphviz"
55
+ task name => "#{name}:full"
56
+ end
57
+
58
+ def setup_ivars(args)
59
+ @name = args.shift || :graph
60
+ @verbose = true
61
+ @path = "graphs"
62
+ end
63
+
64
+ def run_task(verbose)
65
+ puts "Generating #{target_filename}" if verbose
66
+ Pipely.draw(definition.to_json, target_filename)
67
+ end
68
+
69
+ def target_filename
70
+ "#{path}/#{definition.base_filename}.png"
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,86 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'fog'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class UploadSteps < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :upload_steps
14
+ attr_accessor :name
15
+
16
+ # Local path to where the step files are.
17
+ #
18
+ # default:
19
+ # "steps"
20
+ attr_accessor :local_path
21
+
22
+ # Name of S3 bucket to upload steps to.
23
+ attr_accessor :s3_bucket_name
24
+
25
+ # Path within S3 bucket to upload steps to.
26
+ attr_accessor :s3_path
27
+
28
+ # Use verbose output. If this is set to true, the task will print the
29
+ # local and remote paths of each step file it uploads to S3.
30
+ #
31
+ # default:
32
+ # true
33
+ attr_accessor :verbose
34
+
35
+ def initialize(*args, &task_block)
36
+ setup_ivars(args)
37
+
38
+ unless ::Rake.application.last_comment
39
+ desc "Upload Data Pipeline steps to S3"
40
+ end
41
+
42
+ task name, *args do |_, task_args|
43
+ RakeFileUtils.send(:verbose, verbose) do
44
+ if task_block
45
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
46
+ end
47
+
48
+ run_task verbose
49
+ end
50
+ end
51
+ end
52
+
53
+ def setup_ivars(args)
54
+ @name = args.shift || :upload_steps
55
+ @verbose = true
56
+ @local_path = "steps"
57
+ end
58
+
59
+ def run_task(verbose)
60
+ with_bucket do |directory|
61
+ step_files.each do |file_name|
62
+ dest = "#{s3_path}/#{File.basename(file_name)}"
63
+ puts "uploading #{dest}" if verbose
64
+ directory.files.create(key: dest, body: File.read(file_name))
65
+ end
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def with_bucket
72
+ storage = Fog::Storage.new({ provider: 'AWS' })
73
+ if directory = storage.directories.detect{ |d| d.key == s3_bucket_name }
74
+ yield(directory)
75
+ else
76
+ raise "Couldn't find S3 bucket '#{s3_bucket_name}'"
77
+ end
78
+ end
79
+
80
+ def step_files
81
+ FileList.new(File.join(local_path, "*"))
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -1,3 +1,3 @@
1
1
  module Pipely
2
- VERSION = "0.1.5" unless defined?(::DataPipelineGraphviz::VERSION)
2
+ VERSION = "0.2.0" unless defined?(::DataPipelineGraphviz::VERSION)
3
3
  end
@@ -0,0 +1,33 @@
1
+ require 'pipely/build/daily_scheduler'
2
+
3
+ describe Pipely::Build::DailyScheduler do
4
+
5
+ let(:start_time) { "11:00:00" }
6
+
7
+ subject { described_class.new(start_time) }
8
+
9
+ describe "#period" do
10
+ it "is '24 hours'" do
11
+ expect(subject.period).to eq('24 hours')
12
+ end
13
+ end
14
+
15
+ describe "#start_date_time" do
16
+ context "if the start time has already happened today in UTC" do
17
+ it "chooses the start time tomorrow" do
18
+ Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
19
+ expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
20
+ end
21
+ end
22
+ end
23
+
24
+ context "if the start time has not happened yet today in UTC" do
25
+ it "chooses the start time today" do
26
+ Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do
27
+ expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ end
@@ -0,0 +1,19 @@
1
+ require 'pipely/build/right_now_scheduler'
2
+
3
+ describe Pipely::Build::RightNowScheduler do
4
+
5
+ describe "#period" do
6
+ it "is '12 hours'" do
7
+ expect(subject.period).to eq('12 hours')
8
+ end
9
+ end
10
+
11
+ describe "#start_date_time" do
12
+ it "chooses the current time as the start time" do
13
+ Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
14
+ expect(subject.start_date_time).to eq("2013-06-12T16:12:30")
15
+ end
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,46 @@
1
+ require 'pipely/build/s3_path_builder'
2
+
3
+ describe Pipely::Build::S3PathBuilder do
4
+
5
+ subject {
6
+ described_class.new(
7
+ logs: 'log-bucket',
8
+ steps: 'step-bucket',
9
+ assets: 'asset-bucket',
10
+ prefix: 'run-prefix',
11
+ )
12
+ }
13
+
14
+ its(:s3_log_prefix) {
15
+ should eq("s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
16
+ }
17
+
18
+ its(:s3_step_prefix) {
19
+ should eq("s3://step-bucket/run-prefix")
20
+ }
21
+
22
+ its(:s3n_step_prefix) {
23
+ should eq("s3n://step-bucket/run-prefix")
24
+ }
25
+
26
+ its(:s3_asset_prefix) {
27
+ should eq("s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
28
+ }
29
+
30
+ its(:s3n_asset_prefix) {
31
+ should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
32
+ }
33
+
34
+ describe "#to_hash" do
35
+ it 'includes the necessary keys for supplying config to a Template' do
36
+ expect(subject.to_hash.keys).to match_array([
37
+ :s3_log_prefix,
38
+ :s3_step_prefix,
39
+ :s3n_step_prefix,
40
+ :s3_asset_prefix,
41
+ :s3n_asset_prefix,
42
+ ])
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,85 @@
1
+ require 'pipely/build/template'
2
+
3
+ describe Pipely::Build::Template do
4
+ let(:source) { "some test json <%= foo %>" }
5
+
6
+ subject { described_class.new(source) }
7
+
8
+ context 'given some configuration' do
9
+ let(:foo) { 'asdfgwrytqfadfa' }
10
+ let(:expected_json) { "some test json #{foo}" }
11
+
12
+ before do
13
+ subject.apply_config({ foo: foo })
14
+ end
15
+
16
+ its(:to_json) { should eq(expected_json) }
17
+ end
18
+
19
+ describe "#streaming_hadoop_step(options)" do
20
+ let(:s3_path_builder) {
21
+ Pipely::Build::S3PathBuilder.new(
22
+ logs: 'log-bucket',
23
+ steps: 'step-bucket',
24
+ assets: 'asset-bucket',
25
+ prefix: 'run-prefix'
26
+ )
27
+ }
28
+
29
+ before do
30
+ subject.apply_config(s3_path_builder.to_hash)
31
+ end
32
+
33
+ it "builds a streaming hadoop step" do
34
+ step = subject.streaming_hadoop_step(
35
+ :input => '/input_dir/',
36
+ :output => '/output_dir/',
37
+ :mapper => '/mapper.rb',
38
+ :reducer => '/reducer.rb'
39
+ )
40
+
41
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,s3n://step-bucket/run-prefix/reducer.rb")
42
+ end
43
+
44
+ context "given an array of inputs" do
45
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
46
+ step = subject.streaming_hadoop_step(
47
+ :input => ['/input_dir/', '/input_dir2/'],
48
+ :output => '/output_dir/',
49
+ :mapper => '/mapper.rb',
50
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
51
+ )
52
+
53
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir2/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
54
+ end
55
+ end
56
+
57
+ context "given a cacheFile" do
58
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
59
+ step = subject.streaming_hadoop_step(
60
+ :input => '/input_dir/',
61
+ :output => '/output_dir/',
62
+ :cache_file => '/cache_file#cache_file',
63
+ :mapper => '/mapper.rb',
64
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
65
+ )
66
+
67
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer,-cacheFile,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/cache_file#cache_file")
68
+ end
69
+ end
70
+
71
+ context "given the IdentityReducer" do
72
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
73
+ step = subject.streaming_hadoop_step(
74
+ :input => '/input_dir/',
75
+ :output => '/output_dir/',
76
+ :mapper => '/mapper.rb',
77
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
78
+ )
79
+
80
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
81
+ end
82
+ end
83
+ end
84
+
85
+ end
@@ -0,0 +1,3 @@
1
+ describe Pipely::Build do
2
+
3
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'pipely/deploy'
3
+
4
+ describe Pipely::Deploy::Client do
5
+
6
+ describe "#deploy_pipeline" do
7
+ let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
8
+ let(:new_pipeline_id) { "pipeline-three" }
9
+ let(:pipeline_name) { "MyPipeline" }
10
+ let(:definition) { "pipeline json" }
11
+
12
+ it "gets a list of pipelines, creates a new one, and deletes the others" do
13
+ subject.should_receive(:existing_pipelines).
14
+ and_return(existing_pipeline_ids)
15
+
16
+ subject.should_receive(:create_pipeline).
17
+ with(pipeline_name, anything()).
18
+ and_return(new_pipeline_id)
19
+
20
+ existing_pipeline_ids.each do |id|
21
+ subject.should_receive(:delete_pipeline).with(id)
22
+ end
23
+
24
+ subject.deploy_pipeline(pipeline_name, definition)
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,9 @@
1
+ require 'timecop'
2
+ require 'fog'
3
+
4
+ Fog.credentials = {
5
+ aws_access_key_id: "xxx",
6
+ aws_secret_access_key: "xxx"
7
+ }
8
+
9
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pipely
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Gillooly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-17 00:00:00.000000000 Z
11
+ date: 2013-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-graphviz
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 1.18.0
61
+ version: 1.19.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: 1.18.0
68
+ version: 1.19.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: unf
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,34 @@ dependencies:
80
80
  - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: uuidtools
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: rspec
85
113
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +136,20 @@ dependencies:
108
136
  - - '>='
109
137
  - !ruby/object:Gem::Version
110
138
  version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: timecop
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
111
153
  description:
112
154
  email:
113
155
  - matt@swipely.com
@@ -116,24 +158,46 @@ executables:
116
158
  extensions: []
117
159
  extra_rdoc_files: []
118
160
  files:
161
+ - lib/pipely/build/daily_scheduler.rb
162
+ - lib/pipely/build/definition.rb
163
+ - lib/pipely/build/environment_config.rb
164
+ - lib/pipely/build/right_now_scheduler.rb
165
+ - lib/pipely/build/s3_path_builder.rb
166
+ - lib/pipely/build/template.rb
167
+ - lib/pipely/build/template_helpers.rb
168
+ - lib/pipely/build.rb
119
169
  - lib/pipely/component.rb
120
170
  - lib/pipely/definition.rb
121
171
  - lib/pipely/dependency.rb
172
+ - lib/pipely/deploy/client.rb
173
+ - lib/pipely/deploy.rb
122
174
  - lib/pipely/fog_client.rb
123
175
  - lib/pipely/graph_builder.rb
124
176
  - lib/pipely/live_pipeline.rb
125
177
  - lib/pipely/reference_list.rb
126
178
  - lib/pipely/runs_report.rb
179
+ - lib/pipely/tasks/definition.rb
180
+ - lib/pipely/tasks/deploy.rb
181
+ - lib/pipely/tasks/graph.rb
182
+ - lib/pipely/tasks/upload_steps.rb
183
+ - lib/pipely/tasks.rb
127
184
  - lib/pipely/version.rb
128
185
  - lib/pipely.rb
129
186
  - Rakefile
130
187
  - README.md
188
+ - spec/lib/pipely/build/daily_scheduler_spec.rb
189
+ - spec/lib/pipely/build/right_now_scheduler_spec.rb
190
+ - spec/lib/pipely/build/s3_path_builder_spec.rb
191
+ - spec/lib/pipely/build/template_spec.rb
192
+ - spec/lib/pipely/build_spec.rb
131
193
  - spec/lib/pipely/component_spec.rb
132
194
  - spec/lib/pipely/definition_spec.rb
133
195
  - spec/lib/pipely/dependency_spec.rb
196
+ - spec/lib/pipely/deploy/client_spec.rb
134
197
  - spec/lib/pipely/graph_builder_spec.rb
135
198
  - spec/lib/pipely/reference_list_spec.rb
136
199
  - spec/lib/pipely_spec.rb
200
+ - spec/spec_helper.rb
137
201
  - bin/pipely
138
202
  homepage: http://github.com/swipely/pipely
139
203
  licenses:
@@ -160,9 +224,16 @@ signing_key:
160
224
  specification_version: 4
161
225
  summary: Generate dependency graphs from pipeline definitions.
162
226
  test_files:
227
+ - spec/lib/pipely/build/daily_scheduler_spec.rb
228
+ - spec/lib/pipely/build/right_now_scheduler_spec.rb
229
+ - spec/lib/pipely/build/s3_path_builder_spec.rb
230
+ - spec/lib/pipely/build/template_spec.rb
231
+ - spec/lib/pipely/build_spec.rb
163
232
  - spec/lib/pipely/component_spec.rb
164
233
  - spec/lib/pipely/definition_spec.rb
165
234
  - spec/lib/pipely/dependency_spec.rb
235
+ - spec/lib/pipely/deploy/client_spec.rb
166
236
  - spec/lib/pipely/graph_builder_spec.rb
167
237
  - spec/lib/pipely/reference_list_spec.rb
168
238
  - spec/lib/pipely_spec.rb
239
+ - spec/spec_helper.rb