pipely 0.4.4 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c86ce9ade0efc39bc87e6d01085f2dbebd8d4390
4
- data.tar.gz: 5c8b4bbcc6381da109a134dde715dd747813c3f2
3
+ metadata.gz: f66405d01a1c3f031d6f08b65879f011a219d0cb
4
+ data.tar.gz: 0403bab9c88a0ea033b69c93418932026ef6b177
5
5
  SHA512:
6
- metadata.gz: 79ebd0a5df8fc75894a1a946a3084326b524487ec8bfbf2fb19db131335aef40a4b19a2d1f08f75bd1967429c581b5ac2506b32e30e5d0dec80acdaa4bb83a7e
7
- data.tar.gz: e4d05d61888f1a1196acc18f74c8f0168c641f887710572f505502742fdcb1e03c53abe176bbcca81129f7a38506dda15544f9b9a1bafaad0637e72d57ea5fa6
6
+ metadata.gz: 9efa8a80f4185ab7cbd7768a585164366fe0c412667e9564df8c821d7660bc53efb8bb711c69c08aec1b3ebab2308e81b57397ba883e330fa38f7c4b903faf8f
7
+ data.tar.gz: 78415fb04a95d9b6858651c9594fc882614190fb29364c417a7e58b9a776f0b372d3383bfe4cea37b527b66bae8c8f1269fa43735125869d9ebc527837ba7a0e
@@ -8,6 +8,7 @@ module Pipely
8
8
  attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
9
 
10
10
  START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
+ START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}"
11
12
 
12
13
  def initialize(options)
13
14
  @assets_bucket = options[:assets]
@@ -36,6 +37,10 @@ module Pipely
36
37
  "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
37
38
  end
38
39
 
40
+ def s3_shared_asset_prefix
41
+ "s3://#{@assets_bucket}/#{@s3prefix}/shared/#{START_DATE}"
42
+ end
43
+
39
44
  def bucket_relative_s3_asset_prefix
40
45
  "#{@s3prefix}/#{START_TIME}"
41
46
  end
@@ -47,6 +52,7 @@ module Pipely
47
52
  :s3n_step_prefix => s3n_step_prefix,
48
53
  :s3_asset_prefix => s3_asset_prefix,
49
54
  :s3n_asset_prefix => s3n_asset_prefix,
55
+ :s3_shared_asset_prefix => s3_shared_asset_prefix,
50
56
  :bucket_relative_s3_asset_prefix => bucket_relative_s3_asset_prefix,
51
57
  }
52
58
  end
@@ -20,6 +20,14 @@ module Pipely
20
20
  def streaming_hadoop_step(options)
21
21
  parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
22
22
 
23
+ if jars = options[:lib_jars]
24
+ parts += Array(jars).map { |jar| ['-libjars', "#{jar}"] }.flatten
25
+ end
26
+
27
+ (options[:defs] || {}).each do |name, value|
28
+ parts += ['-D', "#{name}=#{value}"]
29
+ end
30
+
23
31
  Array(options[:input]).each do |input|
24
32
  parts += [ '-input', s3n_asset_path(input) ]
25
33
  end
@@ -28,6 +36,10 @@ module Pipely
28
36
  parts += ['-output', s3_asset_path(output) ]
29
37
  end
30
38
 
39
+ if options[:outputformat]
40
+ parts += ['-outputformat', options[:outputformat] ]
41
+ end
42
+
31
43
  Array(options[:mapper]).each do |mapper|
32
44
  parts += ['-mapper', s3n_step_path(mapper) ]
33
45
  end
@@ -18,13 +18,23 @@ module Pipely
18
18
  @data_pipelines = Fog::AWS::DataPipeline.new
19
19
  end
20
20
 
21
- def deploy_pipeline(pipeline_name, definition)
21
+ def deploy_pipeline(pipeline_basename, definition)
22
+ pipeline_name = [
23
+ ('P' if ENV['env'] == 'production'),
24
+ ENV['USER'],
25
+ pipeline_basename
26
+ ].compact.join(':')
27
+
28
+ tags = { "basename" => pipeline_basename }
29
+
22
30
  # Get a list of all existing pipelines
23
31
  pipeline_ids = existing_pipelines(pipeline_name)
24
32
  @log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
25
33
 
26
34
  # Create new pipeline
27
- created_pipeline_id = create_pipeline(pipeline_name, definition)
35
+ created_pipeline_id = create_pipeline(pipeline_name,
36
+ definition,
37
+ tags)
28
38
  @log.info("Created pipeline id '#{created_pipeline_id}'")
29
39
 
30
40
  # Delete old pipelines
@@ -54,14 +64,15 @@ module Pipely
54
64
  ids
55
65
  end
56
66
 
57
- def create_pipeline(pipeline_name, definition)
67
+ def create_pipeline(pipeline_name, definition, tags={})
58
68
  definition_objects = JSON.parse(definition)['objects']
59
69
 
60
70
  unique_id = UUIDTools::UUID.random_create
61
71
 
62
72
  created_pipeline = @data_pipelines.pipelines.create(
63
73
  unique_id: unique_id,
64
- name: pipeline_name
74
+ name: pipeline_name,
75
+ tags: default_tags.merge(tags)
65
76
  )
66
77
 
67
78
  created_pipeline.put(definition_objects)
@@ -74,6 +85,15 @@ module Pipely
74
85
  @data_pipelines.pipelines.get(pipeline_id).destroy
75
86
  end
76
87
 
88
+ private
89
+
90
+ def default_tags
91
+ {
92
+ "environment" => ENV['env'],
93
+ "creator" => ENV['USER']
94
+ }
95
+ end
96
+
77
97
  end
78
98
  end
79
99
  end
@@ -1,3 +1,3 @@
1
1
  module Pipely
2
- VERSION = "0.4.4" unless defined?(::Pipely::VERSION)
2
+ VERSION = "0.4.8" unless defined?(::Pipely::VERSION)
3
3
  end
@@ -31,6 +31,10 @@ describe Pipely::Build::S3PathBuilder do
31
31
  should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
32
32
  }
33
33
 
34
+ its(:s3_shared_asset_prefix) {
35
+ should eq("s3://asset-bucket/run-prefix/shared/\#{format(@scheduledStartTime,'YYYY-MM-dd')}")
36
+ }
37
+
34
38
  describe "#to_hash" do
35
39
  it 'includes the necessary keys for supplying config to a Template' do
36
40
  expect(subject.to_hash.keys).to match_array([
@@ -39,6 +43,7 @@ describe Pipely::Build::S3PathBuilder do
39
43
  :s3n_step_prefix,
40
44
  :s3_asset_prefix,
41
45
  :s3n_asset_prefix,
46
+ :s3_shared_asset_prefix,
42
47
  :bucket_relative_s3_asset_prefix,
43
48
  ])
44
49
  end
@@ -68,6 +68,20 @@ describe Pipely::Build::Template do
68
68
  end
69
69
  end
70
70
 
71
+ context "given an outputformat" do
72
+ it 'points to the outputformat class (not as an S3 URL)' do
73
+ step = subject.streaming_hadoop_step(
74
+ :input => '/input_dir/',
75
+ :output => '/output_dir/',
76
+ :outputformat => 'com.swipely.foo.outputformat',
77
+ :mapper => '/mapper.rb',
78
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
79
+ )
80
+
81
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-outputformat,com.swipely.foo.outputformat,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
82
+ end
83
+ end
84
+
71
85
  context "given the IdentityReducer" do
72
86
  it 'points to the IdentityReducer correctly (not as an S3 URL)' do
73
87
  step = subject.streaming_hadoop_step(
@@ -80,6 +94,34 @@ describe Pipely::Build::Template do
80
94
  expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
81
95
  end
82
96
  end
97
+
98
+ context "given jar files" do
99
+ it 'loads the file correctly' do
100
+ step = subject.streaming_hadoop_step(
101
+ :input => '/input_dir/',
102
+ :output => '/output_dir/',
103
+ :mapper => '/mapper.rb',
104
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
105
+ :lib_jars => [ 'filter.jar', 'filter2.jar' ],
106
+ )
107
+
108
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-libjars,filter.jar,-libjars,filter2.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
109
+ end
110
+ end
111
+
112
+ context "given variables" do
113
+ it 'defines them correctly' do
114
+ step = subject.streaming_hadoop_step(
115
+ :input => '/input_dir/',
116
+ :output => '/output_dir/',
117
+ :mapper => '/mapper.rb',
118
+ :reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
119
+ :defs => {'name' => 'value'}
120
+ )
121
+
122
+ expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-D,name=value,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
123
+ end
124
+ end
83
125
  end
84
126
 
85
127
  end
@@ -6,7 +6,7 @@ describe Pipely::Deploy::Client do
6
6
  describe "#deploy_pipeline" do
7
7
  let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
8
8
  let(:new_pipeline_id) { "pipeline-three" }
9
- let(:pipeline_name) { "MyPipeline" }
9
+ let(:pipeline_basename) { "MyPipeline" }
10
10
  let(:definition) { "pipeline json" }
11
11
 
12
12
  it "gets a list of pipelines, creates a new one, and deletes the others" do
@@ -14,14 +14,17 @@ describe Pipely::Deploy::Client do
14
14
  and_return(existing_pipeline_ids)
15
15
 
16
16
  subject.should_receive(:create_pipeline).
17
- with(pipeline_name, anything()).
17
+ with("#{ENV['USER']}:#{pipeline_basename}",
18
+ anything(),
19
+ hash_including( 'basename' => pipeline_basename )
20
+ ).
18
21
  and_return(new_pipeline_id)
19
22
 
20
23
  existing_pipeline_ids.each do |id|
21
24
  subject.should_receive(:delete_pipeline).with(id)
22
25
  end
23
26
 
24
- subject.deploy_pipeline(pipeline_name, definition)
27
+ subject.deploy_pipeline(pipeline_basename, definition)
25
28
  end
26
29
  end
27
30
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pipely
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.4.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Gillooly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-03 00:00:00.000000000 Z
11
+ date: 2014-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-graphviz
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 1.21.0
61
+ version: 1.22.0
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 1.21.0
68
+ version: 1.22.0
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: unf
71
71
  requirement: !ruby/object:Gem::Requirement