pipely 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 96b0105ea802c8495325b0b01df591a8a95369dc
4
- data.tar.gz: 02ea6bc3048878f4fd9fed396837179b6cc0302e
3
+ metadata.gz: e753a1c8b9207a58a13e7f4454b4d777ee2b42ba
4
+ data.tar.gz: af3d1bbe539a028bcbc58c5e6f1ca2df908f6a41
5
5
  SHA512:
6
- metadata.gz: 1393a450e4c71d8d9dc5477978fac3546092997f6112608c8bafb04dbc65efd5061f04e5eb232c70a816f05ff88a3e6615613afbf35657e459d44e60a0dc7fe7
7
- data.tar.gz: 29cfcc8c63636d99a2de3ccedfd6574f7a67784be90f4d731606a6674477c25c313b8d7e30a72e9019012700af55c5c8de3962eefa568560981d8b75cec070ec
6
+ metadata.gz: 56fdf912f37ebb81a7f17a7a8a6c5686228165045d5917a48f12621b4a83357b7bb73fcb0e3562c0a0df9f53ec8d5131d484056d0f8283a819b1f04daf0c46f9
7
+ data.tar.gz: 6442124a52915c429e9a2e0ada617b687c10c5db887b2c84f1b77f3a96ec1e4e7a0cd789240e594f82807ab5821ebf1f973960654a35a7f29c7375cc4d2d1708
@@ -0,0 +1,31 @@
1
+ require 'pipely/build/definition'
2
+ require 'pipely/build/template'
3
+ require 'pipely/build/daily_scheduler'
4
+ require 'pipely/build/right_now_scheduler'
5
+ require 'pipely/build/s3_path_builder'
6
+ require 'pipely/build/environment_config'
7
+
8
+ module Pipely
9
+
10
+ # Turn Templates+config into a deployable Definition.
11
+ #
12
+ module Build
13
+
14
+ def self.build_definition(template, environment, config_path)
15
+ env = environment.to_sym
16
+ config = EnvironmentConfig.load(config_path, env)
17
+
18
+ case environment.to_sym
19
+ when :production
20
+ s3_prefix = "production/#{config[:namespace]}"
21
+ scheduler = DailyScheduler.new
22
+ when :staging
23
+ s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
24
+ scheduler = RightNowScheduler.new
25
+ end
26
+
27
+ Definition.new(template, env, s3_prefix, scheduler, config)
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Compute schedule attributes for a pipeline that runs once-a-day at a set
5
+ # time.
6
+ #
7
+ class DailyScheduler
8
+
9
+ def initialize(start_time="11:00:00")
10
+ @start_time = start_time
11
+ end
12
+
13
+ def period
14
+ '24 hours'
15
+ end
16
+
17
+ def start_date_time
18
+ date = Date.today
19
+
20
+ # if start_time already happened today, wait for tomorrow's start_time
21
+ now_time = Time.now.utc.strftime('%H:%M:%S')
22
+ date += 1 if now_time >= @start_time
23
+
24
+ date.strftime("%Y-%m-%dT#{@start_time}")
25
+ end
26
+
27
+ def to_hash
28
+ {
29
+ :period => period,
30
+ :start_date_time => start_date_time
31
+ }
32
+ end
33
+
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,26 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Represent a pipeline definition, built from a Template and some config.
5
+ #
6
+ class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
7
+ def pipeline_name
8
+ config[:name]
9
+ end
10
+
11
+ def s3_path_builder
12
+ S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
13
+ end
14
+
15
+ def to_json
16
+ template.apply_config(:environment => env)
17
+ template.apply_config(config)
18
+ template.apply_config(s3_path_builder.to_hash)
19
+ template.apply_config(scheduler.to_hash)
20
+
21
+ template.to_json
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,34 @@
1
+ require 'yaml'
2
+
3
+ module Pipely
4
+ module Build
5
+
6
+ # Work with YAML config files that contain parallel configs for various
7
+ # environments.
8
+ #
9
+ class EnvironmentConfig < Hash
10
+
11
+ def self.load(filename, environment)
12
+ raw = YAML.load_file(filename)[environment.to_s]
13
+ load_from_hash(raw)
14
+ end
15
+
16
+ def self.load_from_hash(attributes)
17
+ config = new
18
+
19
+ attributes.each do |k, v|
20
+ case v
21
+ when Hash
22
+ config[k.to_sym] = load_from_hash(v)
23
+ else
24
+ config[k.to_sym] = v.clone
25
+ end
26
+ end
27
+
28
+ config
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,27 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Compute schedule attributes for a pipeline that should run immediately
5
+ # after being deployed.
6
+ #
7
+ class RightNowScheduler
8
+
9
+ def period
10
+ '12 hours'
11
+ end
12
+
13
+ def start_date_time
14
+ Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S")
15
+ end
16
+
17
+ def to_hash
18
+ {
19
+ :period => period,
20
+ :start_date_time => start_date_time
21
+ }
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,52 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Builds paths to assets, logs, and steps that are on S3.
5
+ #
6
+ class S3PathBuilder
7
+
8
+ attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
+
10
+ START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
+
12
+ def initialize(options)
13
+ @assets_bucket = options[:assets]
14
+ @logs_bucket = options[:logs]
15
+ @steps_bucket = options[:steps]
16
+ @s3prefix = options[:prefix]
17
+ end
18
+
19
+ def s3_log_prefix
20
+ "s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
21
+ end
22
+
23
+ def s3_step_prefix
24
+ "s3://#{@steps_bucket}/#{@s3prefix}"
25
+ end
26
+
27
+ def s3n_step_prefix
28
+ "s3n://#{@steps_bucket}/#{@s3prefix}"
29
+ end
30
+
31
+ def s3_asset_prefix
32
+ "s3://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
33
+ end
34
+
35
+ def s3n_asset_prefix
36
+ "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
37
+ end
38
+
39
+ def to_hash
40
+ {
41
+ :s3_log_prefix => s3_log_prefix,
42
+ :s3_step_prefix => s3_step_prefix,
43
+ :s3n_step_prefix => s3n_step_prefix,
44
+ :s3_asset_prefix => s3_asset_prefix,
45
+ :s3n_asset_prefix => s3n_asset_prefix
46
+ }
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,43 @@
1
+ require 'active_support/core_ext/hash'
2
+ require 'erb'
3
+
4
+ require 'pipely/build/template_helpers'
5
+
6
+ module Pipely
7
+ module Build
8
+
9
+ # An ERB template that can be interpolated with config hashes to render a
10
+ # deployable pipeline definition.
11
+ #
12
+ class Template
13
+ include TemplateHelpers
14
+
15
+ def initialize(source)
16
+ @source = source
17
+ @config = {}
18
+ end
19
+
20
+ def apply_config(attributes)
21
+ @config.merge!(attributes.symbolize_keys)
22
+ end
23
+
24
+ def to_json
25
+ ERB.new(@source).result(binding)
26
+ end
27
+
28
+ def respond_to_missing(method_name, include_private=false)
29
+ @config.keys.include?(method_name.to_s) || super
30
+ end
31
+
32
+ def method_missing(method_name, *args, &block)
33
+ if @config.keys.include?(method_name)
34
+ @config[method_name]
35
+ else
36
+ super
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,53 @@
1
+ module Pipely
2
+ module Build
3
+
4
+ # Helper methods used by ERB templates.
5
+ #
6
+ module TemplateHelpers
7
+
8
+ def s3_asset_path(path)
9
+ "#{s3_asset_prefix if '/' == path[0]}#{path}"
10
+ end
11
+
12
+ def s3n_asset_path(path)
13
+ "#{s3n_asset_prefix if '/' == path[0]}#{path}"
14
+ end
15
+
16
+ def s3n_step_path(path)
17
+ "#{s3n_step_prefix if '/' == path[0]}#{path}"
18
+ end
19
+
20
+ def streaming_hadoop_step(options)
21
+ parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
22
+
23
+ Array(options[:input]).each do |input|
24
+ parts += [ '-input', s3n_asset_path(input) ]
25
+ end
26
+
27
+ Array(options[:output]).each do |output|
28
+ parts += ['-output', s3_asset_path(output) ]
29
+ end
30
+
31
+ Array(options[:mapper]).each do |mapper|
32
+ parts += ['-mapper', s3n_step_path(mapper) ]
33
+ end
34
+
35
+ Array(options[:reducer]).each do |reducer|
36
+ parts += ['-reducer', s3n_step_path(reducer) ]
37
+ end
38
+
39
+ Array(options[:cache_file]).each do |cache_file|
40
+ parts += ['-cacheFile', s3n_asset_path(cache_file)]
41
+ end
42
+
43
+ (options[:env] || {}).each do |name, value|
44
+ parts += ['-cmdenv', "#{name}=#{value}"]
45
+ end
46
+
47
+ parts.join(',')
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1 @@
1
+ require 'pipely/deploy/client'
@@ -0,0 +1,79 @@
1
+ require 'fog'
2
+ require 'logger'
3
+ require 'tempfile'
4
+ require 'uuidtools'
5
+
6
+ module Pipely
7
+ module Deploy
8
+
9
+ # Client for managing deployment of rendered definitions.
10
+ #
11
+ class Client
12
+
13
+ # Generic error representing failure to deploy a rendered definition.
14
+ class PipelineDeployerError < RuntimeError; end
15
+
16
+ def initialize(log=nil)
17
+ @log = log || Logger.new(STDOUT)
18
+ @data_pipelines = Fog::AWS::DataPipeline.new
19
+ end
20
+
21
+ def deploy_pipeline(pipeline_name, definition)
22
+ # Get a list of all existing pipelines
23
+ pipeline_ids = existing_pipelines(pipeline_name)
24
+ @log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
25
+
26
+ # Create new pipeline
27
+ created_pipeline_id = create_pipeline(pipeline_name, definition)
28
+ @log.info("Created pipeline id '#{created_pipeline_id}'")
29
+
30
+ # Delete old pipelines
31
+ pipeline_ids.each do |pipeline_id|
32
+ begin
33
+ delete_pipeline(pipeline_id)
34
+ @log.info("Deleted pipeline '#{pipeline_id}'")
35
+
36
+ rescue PipelineDeployerError => error
37
+ @log.warn(error)
38
+ end
39
+ end
40
+ end
41
+
42
+ def existing_pipelines(pipeline_name)
43
+ ids = []
44
+
45
+ begin
46
+ result = Fog::AWS[:data_pipeline].list_pipelines
47
+
48
+ ids += result['pipelineIdList'].
49
+ select { |p| p['name'] == pipeline_name }.
50
+ map { |p| p['id'] }
51
+
52
+ end while (result['hasMoreResults'] && result['marker'])
53
+
54
+ ids
55
+ end
56
+
57
+ def create_pipeline(pipeline_name, definition)
58
+ definition_objects = JSON.parse(definition)['objects']
59
+
60
+ unique_id = UUIDTools::UUID.random_create
61
+
62
+ created_pipeline = @data_pipelines.pipelines.create(
63
+ unique_id: unique_id,
64
+ name: pipeline_name
65
+ )
66
+
67
+ created_pipeline.put(definition_objects)
68
+ created_pipeline.activate
69
+
70
+ created_pipeline.id
71
+ end
72
+
73
+ def delete_pipeline(pipeline_id)
74
+ @data_pipelines.pipelines.get(pipeline_id).destroy
75
+ end
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,9 @@
1
+ require 'pipely/tasks/upload_steps'
2
+ require 'pipely/tasks/deploy'
3
+ require 'pipely/tasks/graph'
4
+ require 'pipely/tasks/definition'
5
+
6
+ module Pipely
7
+ module Tasks
8
+ end
9
+ end
@@ -0,0 +1,69 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Definition < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :definition
14
+ attr_accessor :name
15
+
16
+ # Path where rendered definitions are written.
17
+ #
18
+ # default:
19
+ # "definitions"
20
+ attr_accessor :path
21
+
22
+ # Pipeline definition instance
23
+ attr_accessor :definition
24
+
25
+ # Use verbose output. If this is set to true, the task will print the
26
+ # local and remote paths of each step file it uploads to S3.
27
+ #
28
+ # default:
29
+ # true
30
+ attr_accessor :verbose
31
+
32
+ def initialize(*args, &task_block)
33
+ setup_ivars(args)
34
+
35
+ directory path
36
+
37
+ desc "Graphs the full pipeline definition using Graphviz"
38
+ task name => path do |_, task_args|
39
+ RakeFileUtils.send(:verbose, verbose) do
40
+ if task_block
41
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
42
+ end
43
+
44
+ run_task verbose
45
+ end
46
+ end
47
+ end
48
+
49
+ def setup_ivars(args)
50
+ @name = args.shift || :definition
51
+ @verbose = true
52
+ @path = "definitions"
53
+ end
54
+
55
+ def run_task(verbose)
56
+ puts "Generating #{target_filename}" if verbose
57
+
58
+ File.open(target_filename, 'w') do |file|
59
+ file.write(definition.to_json)
60
+ end
61
+ end
62
+
63
+ def target_filename
64
+ "#{path}/#{definition.base_filename}.json"
65
+ end
66
+
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,58 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely/deploy'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Deploy < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :deploy
14
+ attr_accessor :name
15
+
16
+ # Pipeline definition instance
17
+ attr_accessor :definition
18
+
19
+ # Use verbose output. If this is set to true, the task will print the
20
+ # local and remote paths of each step file it uploads to S3.
21
+ #
22
+ # default:
23
+ # true
24
+ attr_accessor :verbose
25
+
26
+ def initialize(*args, &task_block)
27
+ setup_ivars(args)
28
+
29
+ desc "Deploy pipeline" unless ::Rake.application.last_comment
30
+
31
+ task name, *args do |_, task_args|
32
+ RakeFileUtils.send(:verbose, verbose) do
33
+ if task_block
34
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
35
+ end
36
+
37
+ run_task verbose
38
+ end
39
+ end
40
+ end
41
+
42
+ def setup_ivars(args)
43
+ @name = args.shift || :deploy
44
+ @verbose = true
45
+ end
46
+
47
+ def run_task(verbose)
48
+ Rake::Task["upload_steps"].invoke
49
+
50
+ Pipely::Deploy::Client.new.deploy_pipeline(
51
+ definition.pipeline_name,
52
+ definition.to_json
53
+ )
54
+ end
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,75 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'pipely'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class Graph < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :graph
14
+ attr_accessor :name
15
+
16
+ # Path to write graph images to.
17
+ #
18
+ # default:
19
+ # "graphs"
20
+ attr_accessor :path
21
+
22
+ # Pipeline definition instance
23
+ attr_accessor :definition
24
+
25
+ # Use verbose output. If this is set to true, the task will print the
26
+ # local and remote paths of each step file it uploads to S3.
27
+ #
28
+ # default:
29
+ # true
30
+ attr_accessor :verbose
31
+
32
+ def initialize(*args, &task_block)
33
+ setup_ivars(args)
34
+
35
+ # create the `path` directory if it doesn't exist
36
+ directory path
37
+
38
+ namespace name do
39
+ task :full => path do |_, task_args|
40
+ RakeFileUtils.send(:verbose, verbose) do
41
+ if task_block
42
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
43
+ end
44
+
45
+ run_task verbose
46
+ end
47
+ end
48
+
49
+ task :open => :full do
50
+ `open #{target_filename}`
51
+ end
52
+ end
53
+
54
+ desc "Graphs the full pipeline definition using Graphviz"
55
+ task name => "#{name}:full"
56
+ end
57
+
58
+ def setup_ivars(args)
59
+ @name = args.shift || :graph
60
+ @verbose = true
61
+ @path = "graphs"
62
+ end
63
+
64
+ def run_task(verbose)
65
+ puts "Generating #{target_filename}" if verbose
66
+ Pipely.draw(definition.to_json, target_filename)
67
+ end
68
+
69
+ def target_filename
70
+ "#{path}/#{definition.base_filename}.png"
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,86 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'fog'
4
+
5
+ module Pipely
6
+ module Tasks
7
+ class UploadSteps < ::Rake::TaskLib
8
+ include ::Rake::DSL if defined?(::Rake::DSL)
9
+
10
+ # Name of task.
11
+ #
12
+ # default:
13
+ # :upload_steps
14
+ attr_accessor :name
15
+
16
+ # Local path to where the step files are.
17
+ #
18
+ # default:
19
+ # "steps"
20
+ attr_accessor :local_path
21
+
22
+ # Name of S3 bucket to upload steps to.
23
+ attr_accessor :s3_bucket_name
24
+
25
+ # Path within S3 bucket to upload steps to.
26
+ attr_accessor :s3_path
27
+
28
+ # Use verbose output. If this is set to true, the task will print the
29
+ # local and remote paths of each step file it uploads to S3.
30
+ #
31
+ # default:
32
+ # true
33
+ attr_accessor :verbose
34
+
35
+ def initialize(*args, &task_block)
36
+ setup_ivars(args)
37
+
38
+ unless ::Rake.application.last_comment
39
+ desc "Upload Data Pipeline steps to S3"
40
+ end
41
+
42
+ task name, *args do |_, task_args|
43
+ RakeFileUtils.send(:verbose, verbose) do
44
+ if task_block
45
+ task_block.call(*[self, task_args].slice(0, task_block.arity))
46
+ end
47
+
48
+ run_task verbose
49
+ end
50
+ end
51
+ end
52
+
53
+ def setup_ivars(args)
54
+ @name = args.shift || :upload_steps
55
+ @verbose = true
56
+ @local_path = "steps"
57
+ end
58
+
59
+ def run_task(verbose)
60
+ with_bucket do |directory|
61
+ step_files.each do |file_name|
62
+ dest = "#{s3_path}/#{File.basename(file_name)}"
63
+ puts "uploading #{dest}" if verbose
64
+ directory.files.create(key: dest, body: File.read(file_name))
65
+ end
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def with_bucket
72
+ storage = Fog::Storage.new({ provider: 'AWS' })
73
+ if directory = storage.directories.detect{ |d| d.key == s3_bucket_name }
74
+ yield(directory)
75
+ else
76
+ raise "Couldn't find S3 bucket '#{s3_bucket_name}'"
77
+ end
78
+ end
79
+
80
+ def step_files
81
+ FileList.new(File.join(local_path, "*"))
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -1,3 +1,3 @@
1
1
  module Pipely
2
- VERSION = "0.1.5" unless defined?(::DataPipelineGraphviz::VERSION)
2
+ VERSION = "0.2.0" unless defined?(::DataPipelineGraphviz::VERSION)
3
3
  end
@@ -0,0 +1,33 @@
1
+ require 'pipely/build/daily_scheduler'
2
+
3
+ describe Pipely::Build::DailyScheduler do
4
+
5
+ let(:start_time) { "11:00:00" }
6
+
7
+ subject { described_class.new(start_time) }
8
+
9
+ describe "#period" do
10
+ it "is '24 hours'" do
11
+ expect(subject.period).to eq('24 hours')
12
+ end
13
+ end
14
+
15
+ describe "#start_date_time" do
16
+ context "if the start time has already happened today in UTC" do
17
+ it "chooses the start time tomorrow" do
18
+ Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
19
+ expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
20
+ end
21
+ end
22
+ end
23
+
24
+ context "if the start time has not happened yet today in UTC" do
25
+ it "chooses the start time today" do
26
+ Timecop.freeze(Time.utc(2013, 6, 13, 4, 12, 30)) do
27
+ expect(subject.start_date_time).to eq("2013-06-13T11:00:00")
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ end
@@ -0,0 +1,19 @@
1
+ require 'pipely/build/right_now_scheduler'
2
+
3
+ describe Pipely::Build::RightNowScheduler do
4
+
5
+ describe "#period" do
6
+ it "is '12 hours'" do
7
+ expect(subject.period).to eq('12 hours')
8
+ end
9
+ end
10
+
11
+ describe "#start_date_time" do
12
+ it "chooses the current time as the start time" do
13
+ Timecop.freeze(Time.utc(2013, 6, 12, 16, 12, 30)) do
14
+ expect(subject.start_date_time).to eq("2013-06-12T16:12:30")
15
+ end
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,46 @@
1
+ require 'pipely/build/s3_path_builder'
2
+
3
+ describe Pipely::Build::S3PathBuilder do
4
+
5
+ subject {
6
+ described_class.new(
7
+ logs: 'log-bucket',
8
+ steps: 'step-bucket',
9
+ assets: 'asset-bucket',
10
+ prefix: 'run-prefix',
11
+ )
12
+ }
13
+
14
+ its(:s3_log_prefix) {
15
+ should eq("s3://log-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
16
+ }
17
+
18
+ its(:s3_step_prefix) {
19
+ should eq("s3://step-bucket/run-prefix")
20
+ }
21
+
22
+ its(:s3n_step_prefix) {
23
+ should eq("s3n://step-bucket/run-prefix")
24
+ }
25
+
26
+ its(:s3_asset_prefix) {
27
+ should eq("s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
28
+ }
29
+
30
+ its(:s3n_asset_prefix) {
31
+ should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
32
+ }
33
+
34
+ describe "#to_hash" do
35
+ it 'includes the necessary keys for supplying config to a Template' do
36
+ expect(subject.to_hash.keys).to match_array([
37
+ :s3_log_prefix,
38
+ :s3_step_prefix,
39
+ :s3n_step_prefix,
40
+ :s3_asset_prefix,
41
+ :s3n_asset_prefix,
42
+ ])
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,85 @@
1
+ require 'pipely/build/template'
2
+
3
+ describe Pipely::Build::Template do
4
+ let(:source) { "some test json <%= foo %>" }
5
+
6
+ subject { described_class.new(source) }
7
+
8
+ context 'given some configuration' do
9
+ let(:foo) { 'asdfgwrytqfadfa' }
10
+ let(:expected_json) { "some test json #{foo}" }
11
+
12
+ before do
13
+ subject.apply_config({ foo: foo })
14
+ end
15
+
16
+ its(:to_json) { should eq(expected_json) }
17
+ end
18
+
19
+ describe "#streaming_hadoop_step(options)" do
20
+ let(:s3_path_builder) {
21
+ Pipely::Build::S3PathBuilder.new(
22
+ logs: 'log-bucket',
23
+ steps: 'step-bucket',
24
+ assets: 'asset-bucket',
25
+ prefix: 'run-prefix'
26
+ )
27
+ }
28
+
29
+ before do
30
+ subject.apply_config(s3_path_builder.to_hash)
31
+ end
32
+
33
+ it "builds a streaming hadoop step" do
34
+ step = subject.streaming_hadoop_step(
35
+ :input => '/input_dir/',
36
+ :output => '/output_dir/',
37
+ :mapper => '/mapper.rb',
38
+ :reducer => '/reducer.rb'
39
+ )
40
+
41
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,s3n://step-bucket/run-prefix/reducer.rb")
42
+ end
43
+
44
+ context "given an array of inputs" do
45
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
46
+ step = subject.streaming_hadoop_step(
47
+ :input => ['/input_dir/', '/input_dir2/'],
48
+ :output => '/output_dir/',
49
+ :mapper => '/mapper.rb',
50
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
51
+ )
52
+
53
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir2/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
54
+ end
55
+ end
56
+
57
+ context "given a cacheFile" do
58
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
59
+ step = subject.streaming_hadoop_step(
60
+ :input => '/input_dir/',
61
+ :output => '/output_dir/',
62
+ :cache_file => '/cache_file#cache_file',
63
+ :mapper => '/mapper.rb',
64
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
65
+ )
66
+
67
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer,-cacheFile,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/cache_file#cache_file")
68
+ end
69
+ end
70
+
71
+ context "given the IdentityReducer" do
72
+ it 'points to the IdentityReducer correctly (not as an S3 URL)' do
73
+ step = subject.streaming_hadoop_step(
74
+ :input => '/input_dir/',
75
+ :output => '/output_dir/',
76
+ :mapper => '/mapper.rb',
77
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
78
+ )
79
+
80
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
81
+ end
82
+ end
83
+ end
84
+
85
+ end
@@ -0,0 +1,3 @@
1
+ describe Pipely::Build do
2
+
3
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+ require 'pipely/deploy'
3
+
4
+ describe Pipely::Deploy::Client do
5
+
6
+ describe "#deploy_pipeline" do
7
+ let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
8
+ let(:new_pipeline_id) { "pipeline-three" }
9
+ let(:pipeline_name) { "MyPipeline" }
10
+ let(:definition) { "pipeline json" }
11
+
12
+ it "gets a list of pipelines, creates a new one, and deletes the others" do
13
+ subject.should_receive(:existing_pipelines).
14
+ and_return(existing_pipeline_ids)
15
+
16
+ subject.should_receive(:create_pipeline).
17
+ with(pipeline_name, anything()).
18
+ and_return(new_pipeline_id)
19
+
20
+ existing_pipeline_ids.each do |id|
21
+ subject.should_receive(:delete_pipeline).with(id)
22
+ end
23
+
24
+ subject.deploy_pipeline(pipeline_name, definition)
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,9 @@
1
+ require 'timecop'
2
+ require 'fog'
3
+
4
+ Fog.credentials = {
5
+ aws_access_key_id: "xxx",
6
+ aws_secret_access_key: "xxx"
7
+ }
8
+
9
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pipely
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Gillooly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-17 00:00:00.000000000 Z
11
+ date: 2013-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-graphviz
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 1.18.0
61
+ version: 1.19.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: 1.18.0
68
+ version: 1.19.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: unf
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,34 @@ dependencies:
80
80
  - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: uuidtools
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: rspec
85
113
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +136,20 @@ dependencies:
108
136
  - - '>='
109
137
  - !ruby/object:Gem::Version
110
138
  version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: timecop
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
111
153
  description:
112
154
  email:
113
155
  - matt@swipely.com
@@ -116,24 +158,46 @@ executables:
116
158
  extensions: []
117
159
  extra_rdoc_files: []
118
160
  files:
161
+ - lib/pipely/build/daily_scheduler.rb
162
+ - lib/pipely/build/definition.rb
163
+ - lib/pipely/build/environment_config.rb
164
+ - lib/pipely/build/right_now_scheduler.rb
165
+ - lib/pipely/build/s3_path_builder.rb
166
+ - lib/pipely/build/template.rb
167
+ - lib/pipely/build/template_helpers.rb
168
+ - lib/pipely/build.rb
119
169
  - lib/pipely/component.rb
120
170
  - lib/pipely/definition.rb
121
171
  - lib/pipely/dependency.rb
172
+ - lib/pipely/deploy/client.rb
173
+ - lib/pipely/deploy.rb
122
174
  - lib/pipely/fog_client.rb
123
175
  - lib/pipely/graph_builder.rb
124
176
  - lib/pipely/live_pipeline.rb
125
177
  - lib/pipely/reference_list.rb
126
178
  - lib/pipely/runs_report.rb
179
+ - lib/pipely/tasks/definition.rb
180
+ - lib/pipely/tasks/deploy.rb
181
+ - lib/pipely/tasks/graph.rb
182
+ - lib/pipely/tasks/upload_steps.rb
183
+ - lib/pipely/tasks.rb
127
184
  - lib/pipely/version.rb
128
185
  - lib/pipely.rb
129
186
  - Rakefile
130
187
  - README.md
188
+ - spec/lib/pipely/build/daily_scheduler_spec.rb
189
+ - spec/lib/pipely/build/right_now_scheduler_spec.rb
190
+ - spec/lib/pipely/build/s3_path_builder_spec.rb
191
+ - spec/lib/pipely/build/template_spec.rb
192
+ - spec/lib/pipely/build_spec.rb
131
193
  - spec/lib/pipely/component_spec.rb
132
194
  - spec/lib/pipely/definition_spec.rb
133
195
  - spec/lib/pipely/dependency_spec.rb
196
+ - spec/lib/pipely/deploy/client_spec.rb
134
197
  - spec/lib/pipely/graph_builder_spec.rb
135
198
  - spec/lib/pipely/reference_list_spec.rb
136
199
  - spec/lib/pipely_spec.rb
200
+ - spec/spec_helper.rb
137
201
  - bin/pipely
138
202
  homepage: http://github.com/swipely/pipely
139
203
  licenses:
@@ -160,9 +224,16 @@ signing_key:
160
224
  specification_version: 4
161
225
  summary: Generate dependency graphs from pipeline definitions.
162
226
  test_files:
227
+ - spec/lib/pipely/build/daily_scheduler_spec.rb
228
+ - spec/lib/pipely/build/right_now_scheduler_spec.rb
229
+ - spec/lib/pipely/build/s3_path_builder_spec.rb
230
+ - spec/lib/pipely/build/template_spec.rb
231
+ - spec/lib/pipely/build_spec.rb
163
232
  - spec/lib/pipely/component_spec.rb
164
233
  - spec/lib/pipely/definition_spec.rb
165
234
  - spec/lib/pipely/dependency_spec.rb
235
+ - spec/lib/pipely/deploy/client_spec.rb
166
236
  - spec/lib/pipely/graph_builder_spec.rb
167
237
  - spec/lib/pipely/reference_list_spec.rb
168
238
  - spec/lib/pipely_spec.rb
239
+ - spec/spec_helper.rb