pipely 0.4.4 → 0.4.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c86ce9ade0efc39bc87e6d01085f2dbebd8d4390
4
- data.tar.gz: 5c8b4bbcc6381da109a134dde715dd747813c3f2
3
+ metadata.gz: f66405d01a1c3f031d6f08b65879f011a219d0cb
4
+ data.tar.gz: 0403bab9c88a0ea033b69c93418932026ef6b177
5
5
  SHA512:
6
- metadata.gz: 79ebd0a5df8fc75894a1a946a3084326b524487ec8bfbf2fb19db131335aef40a4b19a2d1f08f75bd1967429c581b5ac2506b32e30e5d0dec80acdaa4bb83a7e
7
- data.tar.gz: e4d05d61888f1a1196acc18f74c8f0168c641f887710572f505502742fdcb1e03c53abe176bbcca81129f7a38506dda15544f9b9a1bafaad0637e72d57ea5fa6
6
+ metadata.gz: 9efa8a80f4185ab7cbd7768a585164366fe0c412667e9564df8c821d7660bc53efb8bb711c69c08aec1b3ebab2308e81b57397ba883e330fa38f7c4b903faf8f
7
+ data.tar.gz: 78415fb04a95d9b6858651c9594fc882614190fb29364c417a7e58b9a776f0b372d3383bfe4cea37b527b66bae8c8f1269fa43735125869d9ebc527837ba7a0e
@@ -8,6 +8,7 @@ module Pipely
8
8
  attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
9
 
10
10
  START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
+ START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}"
11
12
 
12
13
  def initialize(options)
13
14
  @assets_bucket = options[:assets]
@@ -36,6 +37,10 @@ module Pipely
36
37
  "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
37
38
  end
38
39
 
40
+ def s3_shared_asset_prefix
41
+ "s3://#{@assets_bucket}/#{@s3prefix}/shared/#{START_DATE}"
42
+ end
43
+
39
44
  def bucket_relative_s3_asset_prefix
40
45
  "#{@s3prefix}/#{START_TIME}"
41
46
  end
@@ -47,6 +52,7 @@ module Pipely
47
52
  :s3n_step_prefix => s3n_step_prefix,
48
53
  :s3_asset_prefix => s3_asset_prefix,
49
54
  :s3n_asset_prefix => s3n_asset_prefix,
55
+ :s3_shared_asset_prefix => s3_shared_asset_prefix,
50
56
  :bucket_relative_s3_asset_prefix => bucket_relative_s3_asset_prefix,
51
57
  }
52
58
  end
@@ -20,6 +20,14 @@ module Pipely
20
20
  def streaming_hadoop_step(options)
21
21
  parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
22
22
 
23
+ if jars = options[:lib_jars]
24
+ parts += Array(jars).map { |jar| ['-libjars', "#{jar}"] }.flatten
25
+ end
26
+
27
+ (options[:defs] || {}).each do |name, value|
28
+ parts += ['-D', "#{name}=#{value}"]
29
+ end
30
+
23
31
  Array(options[:input]).each do |input|
24
32
  parts += [ '-input', s3n_asset_path(input) ]
25
33
  end
@@ -28,6 +36,10 @@ module Pipely
28
36
  parts += ['-output', s3_asset_path(output) ]
29
37
  end
30
38
 
39
+ if options[:outputformat]
40
+ parts += ['-outputformat', options[:outputformat] ]
41
+ end
42
+
31
43
  Array(options[:mapper]).each do |mapper|
32
44
  parts += ['-mapper', s3n_step_path(mapper) ]
33
45
  end
@@ -18,13 +18,23 @@ module Pipely
18
18
  @data_pipelines = Fog::AWS::DataPipeline.new
19
19
  end
20
20
 
21
- def deploy_pipeline(pipeline_name, definition)
21
+ def deploy_pipeline(pipeline_basename, definition)
22
+ pipeline_name = [
23
+ ('P' if ENV['env'] == 'production'),
24
+ ENV['USER'],
25
+ pipeline_basename
26
+ ].compact.join(':')
27
+
28
+ tags = { "basename" => pipeline_basename }
29
+
22
30
  # Get a list of all existing pipelines
23
31
  pipeline_ids = existing_pipelines(pipeline_name)
24
32
  @log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
25
33
 
26
34
  # Create new pipeline
27
- created_pipeline_id = create_pipeline(pipeline_name, definition)
35
+ created_pipeline_id = create_pipeline(pipeline_name,
36
+ definition,
37
+ tags)
28
38
  @log.info("Created pipeline id '#{created_pipeline_id}'")
29
39
 
30
40
  # Delete old pipelines
@@ -54,14 +64,15 @@ module Pipely
54
64
  ids
55
65
  end
56
66
 
57
- def create_pipeline(pipeline_name, definition)
67
+ def create_pipeline(pipeline_name, definition, tags={})
58
68
  definition_objects = JSON.parse(definition)['objects']
59
69
 
60
70
  unique_id = UUIDTools::UUID.random_create
61
71
 
62
72
  created_pipeline = @data_pipelines.pipelines.create(
63
73
  unique_id: unique_id,
64
- name: pipeline_name
74
+ name: pipeline_name,
75
+ tags: default_tags.merge(tags)
65
76
  )
66
77
 
67
78
  created_pipeline.put(definition_objects)
@@ -74,6 +85,15 @@ module Pipely
74
85
  @data_pipelines.pipelines.get(pipeline_id).destroy
75
86
  end
76
87
 
88
+ private
89
+
90
+ def default_tags
91
+ {
92
+ "environment" => ENV['env'],
93
+ "creator" => ENV['USER']
94
+ }
95
+ end
96
+
77
97
  end
78
98
  end
79
99
  end
@@ -1,3 +1,3 @@
1
1
  module Pipely
2
- VERSION = "0.4.4" unless defined?(::Pipely::VERSION)
2
+ VERSION = "0.4.8" unless defined?(::Pipely::VERSION)
3
3
  end
@@ -31,6 +31,10 @@ describe Pipely::Build::S3PathBuilder do
31
31
  should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
32
32
  }
33
33
 
34
+ its(:s3_shared_asset_prefix) {
35
+ should eq("s3://asset-bucket/run-prefix/shared/\#{format(@scheduledStartTime,'YYYY-MM-dd')}")
36
+ }
37
+
34
38
  describe "#to_hash" do
35
39
  it 'includes the necessary keys for supplying config to a Template' do
36
40
  expect(subject.to_hash.keys).to match_array([
@@ -39,6 +43,7 @@ describe Pipely::Build::S3PathBuilder do
39
43
  :s3n_step_prefix,
40
44
  :s3_asset_prefix,
41
45
  :s3n_asset_prefix,
46
+ :s3_shared_asset_prefix,
42
47
  :bucket_relative_s3_asset_prefix,
43
48
  ])
44
49
  end
@@ -68,6 +68,20 @@ describe Pipely::Build::Template do
68
68
  end
69
69
  end
70
70
 
71
+ context "given an outputformat" do
72
+ it 'points to the outputformat class (not as an S3 URL)' do
73
+ step = subject.streaming_hadoop_step(
74
+ :input => '/input_dir/',
75
+ :output => '/output_dir/',
76
+ :outputformat => 'com.swipely.foo.outputformat',
77
+ :mapper => '/mapper.rb',
78
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
79
+ )
80
+
81
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-outputformat,com.swipely.foo.outputformat,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
82
+ end
83
+ end
84
+
71
85
  context "given the IdentityReducer" do
72
86
  it 'points to the IdentityReducer correctly (not as an S3 URL)' do
73
87
  step = subject.streaming_hadoop_step(
@@ -80,6 +94,34 @@ describe Pipely::Build::Template do
80
94
  expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
81
95
  end
82
96
  end
97
+
98
+ context "given jar files" do
99
+ it 'loads the file correctly' do
100
+ step = subject.streaming_hadoop_step(
101
+ :input => '/input_dir/',
102
+ :output => '/output_dir/',
103
+ :mapper => '/mapper.rb',
104
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
105
+ :lib_jars => [ 'filter.jar', 'filter2.jar' ],
106
+ )
107
+
108
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-libjars,filter.jar,-libjars,filter2.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
109
+ end
110
+ end
111
+
112
+ context "given variables" do
113
+ it 'defines them correctly' do
114
+ step = subject.streaming_hadoop_step(
115
+ :input => '/input_dir/',
116
+ :output => '/output_dir/',
117
+ :mapper => '/mapper.rb',
118
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
119
+ :defs => {'name' => 'value'}
120
+ )
121
+
122
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-D,name=value,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
123
+ end
124
+ end
83
125
  end
84
126
 
85
127
  end
@@ -6,7 +6,7 @@ describe Pipely::Deploy::Client do
6
6
  describe "#deploy_pipeline" do
7
7
  let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
8
8
  let(:new_pipeline_id) { "pipeline-three" }
9
- let(:pipeline_name) { "MyPipeline" }
9
+ let(:pipeline_basename) { "MyPipeline" }
10
10
  let(:definition) { "pipeline json" }
11
11
 
12
12
  it "gets a list of pipelines, creates a new one, and deletes the others" do
@@ -14,14 +14,17 @@ describe Pipely::Deploy::Client do
14
14
  and_return(existing_pipeline_ids)
15
15
 
16
16
  subject.should_receive(:create_pipeline).
17
- with(pipeline_name, anything()).
17
+ with("#{ENV['USER']}:#{pipeline_basename}",
18
+ anything(),
19
+ hash_including( 'basename' => pipeline_basename )
20
+ ).
18
21
  and_return(new_pipeline_id)
19
22
 
20
23
  existing_pipeline_ids.each do |id|
21
24
  subject.should_receive(:delete_pipeline).with(id)
22
25
  end
23
26
 
24
- subject.deploy_pipeline(pipeline_name, definition)
27
+ subject.deploy_pipeline(pipeline_basename, definition)
25
28
  end
26
29
  end
27
30
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pipely
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Gillooly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-03 00:00:00.000000000 Z
11
+ date: 2014-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-graphviz
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 1.21.0
61
+ version: 1.22.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 1.21.0
68
+ version: 1.22.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: unf
71
71
  requirement: !ruby/object:Gem::Requirement