pipely 0.4.4 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pipely/build/s3_path_builder.rb +6 -0
- data/lib/pipely/build/template_helpers.rb +12 -0
- data/lib/pipely/deploy/client.rb +24 -4
- data/lib/pipely/version.rb +1 -1
- data/spec/lib/pipely/build/s3_path_builder_spec.rb +5 -0
- data/spec/lib/pipely/build/template_spec.rb +42 -0
- data/spec/lib/pipely/deploy/client_spec.rb +6 -3
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f66405d01a1c3f031d6f08b65879f011a219d0cb
|
4
|
+
data.tar.gz: 0403bab9c88a0ea033b69c93418932026ef6b177
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9efa8a80f4185ab7cbd7768a585164366fe0c412667e9564df8c821d7660bc53efb8bb711c69c08aec1b3ebab2308e81b57397ba883e330fa38f7c4b903faf8f
|
7
|
+
data.tar.gz: 78415fb04a95d9b6858651c9594fc882614190fb29364c417a7e58b9a776f0b372d3383bfe4cea37b527b66bae8c8f1269fa43735125869d9ebc527837ba7a0e
|
@@ -8,6 +8,7 @@ module Pipely
|
|
8
8
|
attr_reader :assets_bucket, :logs_bucket, :steps_bucket
|
9
9
|
|
10
10
|
START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
|
11
|
+
START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}"
|
11
12
|
|
12
13
|
def initialize(options)
|
13
14
|
@assets_bucket = options[:assets]
|
@@ -36,6 +37,10 @@ module Pipely
|
|
36
37
|
"s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
|
37
38
|
end
|
38
39
|
|
40
|
+
def s3_shared_asset_prefix
|
41
|
+
"s3://#{@assets_bucket}/#{@s3prefix}/shared/#{START_DATE}"
|
42
|
+
end
|
43
|
+
|
39
44
|
def bucket_relative_s3_asset_prefix
|
40
45
|
"#{@s3prefix}/#{START_TIME}"
|
41
46
|
end
|
@@ -47,6 +52,7 @@ module Pipely
|
|
47
52
|
:s3n_step_prefix => s3n_step_prefix,
|
48
53
|
:s3_asset_prefix => s3_asset_prefix,
|
49
54
|
:s3n_asset_prefix => s3n_asset_prefix,
|
55
|
+
:s3_shared_asset_prefix => s3_shared_asset_prefix,
|
50
56
|
:bucket_relative_s3_asset_prefix => bucket_relative_s3_asset_prefix,
|
51
57
|
}
|
52
58
|
end
|
@@ -20,6 +20,14 @@ module Pipely
|
|
20
20
|
def streaming_hadoop_step(options)
|
21
21
|
parts = [ '/home/hadoop/contrib/streaming/hadoop-streaming.jar' ]
|
22
22
|
|
23
|
+
if jars = options[:lib_jars]
|
24
|
+
parts += Array(jars).map { |jar| ['-libjars', "#{jar}"] }.flatten
|
25
|
+
end
|
26
|
+
|
27
|
+
(options[:defs] || {}).each do |name, value|
|
28
|
+
parts += ['-D', "#{name}=#{value}"]
|
29
|
+
end
|
30
|
+
|
23
31
|
Array(options[:input]).each do |input|
|
24
32
|
parts += [ '-input', s3n_asset_path(input) ]
|
25
33
|
end
|
@@ -28,6 +36,10 @@ module Pipely
|
|
28
36
|
parts += ['-output', s3_asset_path(output) ]
|
29
37
|
end
|
30
38
|
|
39
|
+
if options[:outputformat]
|
40
|
+
parts += ['-outputformat', options[:outputformat] ]
|
41
|
+
end
|
42
|
+
|
31
43
|
Array(options[:mapper]).each do |mapper|
|
32
44
|
parts += ['-mapper', s3n_step_path(mapper) ]
|
33
45
|
end
|
data/lib/pipely/deploy/client.rb
CHANGED
@@ -18,13 +18,23 @@ module Pipely
|
|
18
18
|
@data_pipelines = Fog::AWS::DataPipeline.new
|
19
19
|
end
|
20
20
|
|
21
|
-
def deploy_pipeline(
|
21
|
+
def deploy_pipeline(pipeline_basename, definition)
|
22
|
+
pipeline_name = [
|
23
|
+
('P' if ENV['env'] == 'production'),
|
24
|
+
ENV['USER'],
|
25
|
+
pipeline_basename
|
26
|
+
].compact.join(':')
|
27
|
+
|
28
|
+
tags = { "basename" => pipeline_basename }
|
29
|
+
|
22
30
|
# Get a list of all existing pipelines
|
23
31
|
pipeline_ids = existing_pipelines(pipeline_name)
|
24
32
|
@log.info("#{pipeline_ids.count} existing pipelines: #{pipeline_ids}")
|
25
33
|
|
26
34
|
# Create new pipeline
|
27
|
-
created_pipeline_id = create_pipeline(pipeline_name,
|
35
|
+
created_pipeline_id = create_pipeline(pipeline_name,
|
36
|
+
definition,
|
37
|
+
tags)
|
28
38
|
@log.info("Created pipeline id '#{created_pipeline_id}'")
|
29
39
|
|
30
40
|
# Delete old pipelines
|
@@ -54,14 +64,15 @@ module Pipely
|
|
54
64
|
ids
|
55
65
|
end
|
56
66
|
|
57
|
-
def create_pipeline(pipeline_name, definition)
|
67
|
+
def create_pipeline(pipeline_name, definition, tags={})
|
58
68
|
definition_objects = JSON.parse(definition)['objects']
|
59
69
|
|
60
70
|
unique_id = UUIDTools::UUID.random_create
|
61
71
|
|
62
72
|
created_pipeline = @data_pipelines.pipelines.create(
|
63
73
|
unique_id: unique_id,
|
64
|
-
name: pipeline_name
|
74
|
+
name: pipeline_name,
|
75
|
+
tags: default_tags.merge(tags)
|
65
76
|
)
|
66
77
|
|
67
78
|
created_pipeline.put(definition_objects)
|
@@ -74,6 +85,15 @@ module Pipely
|
|
74
85
|
@data_pipelines.pipelines.get(pipeline_id).destroy
|
75
86
|
end
|
76
87
|
|
88
|
+
private
|
89
|
+
|
90
|
+
def default_tags
|
91
|
+
{
|
92
|
+
"environment" => ENV['env'],
|
93
|
+
"creator" => ENV['USER']
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
77
97
|
end
|
78
98
|
end
|
79
99
|
end
|
data/lib/pipely/version.rb
CHANGED
@@ -31,6 +31,10 @@ describe Pipely::Build::S3PathBuilder do
|
|
31
31
|
should eq("s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}")
|
32
32
|
}
|
33
33
|
|
34
|
+
its(:s3_shared_asset_prefix) {
|
35
|
+
should eq("s3://asset-bucket/run-prefix/shared/\#{format(@scheduledStartTime,'YYYY-MM-dd')}")
|
36
|
+
}
|
37
|
+
|
34
38
|
describe "#to_hash" do
|
35
39
|
it 'includes the necessary keys for supplying config to a Template' do
|
36
40
|
expect(subject.to_hash.keys).to match_array([
|
@@ -39,6 +43,7 @@ describe Pipely::Build::S3PathBuilder do
|
|
39
43
|
:s3n_step_prefix,
|
40
44
|
:s3_asset_prefix,
|
41
45
|
:s3n_asset_prefix,
|
46
|
+
:s3_shared_asset_prefix,
|
42
47
|
:bucket_relative_s3_asset_prefix,
|
43
48
|
])
|
44
49
|
end
|
@@ -68,6 +68,20 @@ describe Pipely::Build::Template do
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
context "given an outputformat" do
|
72
|
+
it 'points to the outputformat class (not as an S3 URL)' do
|
73
|
+
step = subject.streaming_hadoop_step(
|
74
|
+
:input => '/input_dir/',
|
75
|
+
:output => '/output_dir/',
|
76
|
+
:outputformat => 'com.swipely.foo.outputformat',
|
77
|
+
:mapper => '/mapper.rb',
|
78
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer'
|
79
|
+
)
|
80
|
+
|
81
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-outputformat,com.swipely.foo.outputformat,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
71
85
|
context "given the IdentityReducer" do
|
72
86
|
it 'points to the IdentityReducer correctly (not as an S3 URL)' do
|
73
87
|
step = subject.streaming_hadoop_step(
|
@@ -80,6 +94,34 @@ describe Pipely::Build::Template do
|
|
80
94
|
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
81
95
|
end
|
82
96
|
end
|
97
|
+
|
98
|
+
context "given jar files" do
|
99
|
+
it 'loads the file correctly' do
|
100
|
+
step = subject.streaming_hadoop_step(
|
101
|
+
:input => '/input_dir/',
|
102
|
+
:output => '/output_dir/',
|
103
|
+
:mapper => '/mapper.rb',
|
104
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
|
105
|
+
:lib_jars => [ 'filter.jar', 'filter2.jar' ],
|
106
|
+
)
|
107
|
+
|
108
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-libjars,filter.jar,-libjars,filter2.jar,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
context "given variables" do
|
113
|
+
it 'defines them correctly' do
|
114
|
+
step = subject.streaming_hadoop_step(
|
115
|
+
:input => '/input_dir/',
|
116
|
+
:output => '/output_dir/',
|
117
|
+
:mapper => '/mapper.rb',
|
118
|
+
:reducer => 'org.apache.hadoop.mapred.lib.IdentityReducer',
|
119
|
+
:defs => {'name' => 'value'}
|
120
|
+
)
|
121
|
+
|
122
|
+
expect(step).to eq("/home/hadoop/contrib/streaming/hadoop-streaming.jar,-D,name=value,-input,s3n://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/input_dir/,-output,s3://asset-bucket/run-prefix/\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/output_dir/,-mapper,s3n://step-bucket/run-prefix/mapper.rb,-reducer,org.apache.hadoop.mapred.lib.IdentityReducer")
|
123
|
+
end
|
124
|
+
end
|
83
125
|
end
|
84
126
|
|
85
127
|
end
|
@@ -6,7 +6,7 @@ describe Pipely::Deploy::Client do
|
|
6
6
|
describe "#deploy_pipeline" do
|
7
7
|
let(:existing_pipeline_ids) { ["pipeline-one", "pipeline-two"] }
|
8
8
|
let(:new_pipeline_id) { "pipeline-three" }
|
9
|
-
let(:
|
9
|
+
let(:pipeline_basename) { "MyPipeline" }
|
10
10
|
let(:definition) { "pipeline json" }
|
11
11
|
|
12
12
|
it "gets a list of pipelines, creates a new one, and deletes the others" do
|
@@ -14,14 +14,17 @@ describe Pipely::Deploy::Client do
|
|
14
14
|
and_return(existing_pipeline_ids)
|
15
15
|
|
16
16
|
subject.should_receive(:create_pipeline).
|
17
|
-
with(
|
17
|
+
with("#{ENV['USER']}:#{pipeline_basename}",
|
18
|
+
anything(),
|
19
|
+
hash_including( 'basename' => pipeline_basename )
|
20
|
+
).
|
18
21
|
and_return(new_pipeline_id)
|
19
22
|
|
20
23
|
existing_pipeline_ids.each do |id|
|
21
24
|
subject.should_receive(:delete_pipeline).with(id)
|
22
25
|
end
|
23
26
|
|
24
|
-
subject.deploy_pipeline(
|
27
|
+
subject.deploy_pipeline(pipeline_basename, definition)
|
25
28
|
end
|
26
29
|
end
|
27
30
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pipely
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Gillooly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-graphviz
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 1.
|
61
|
+
version: 1.22.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 1.
|
68
|
+
version: 1.22.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: unf
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|