aw_datapipe 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe971d0b5fa7e5a8558d43e7e0940e634d3bbfeb
4
- data.tar.gz: 9ccb16f29ef04f12ff5fdbc23d042e86d762393a
3
+ metadata.gz: c7db5771c39e741d11c07927b1702e5c0db4372e
4
+ data.tar.gz: 8e005911037c8bc04e00715cdc41dc49c068c7ff
5
5
  SHA512:
6
- metadata.gz: c26e3f06941dabdda94fa17d8dabf0edd623820b7ce8579efd7f78f51ef32c9e2f085a68c7fd022b41a11431d7ca8fc026a2838e28ec92711bcda06f85293c38
7
- data.tar.gz: d7758fde384e2d782805b0b9b0c3fb2e0c3094f31a6d4d2863ac8e49664f211567054fd35e76cb260c68e139998dbdfef9f12ab69438b6180f345aface21d2f5
6
+ metadata.gz: e84ae673cb43f5819d231834e8ffd66b538ed16c50c8e82f5cac8673ae5655fa66adabc1efd45246f2477e58c3d6b4d2893b83fa61de072b474eed4416e958d4
7
+ data.tar.gz: 21740c23b42b4b01143827274fb1e7ec3508be1739cd8a0f39d14783a52007674b2c632c06c1d095169932bbe4fc09e94e51bc14556ee61f04b764833bab4a74
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.2
5
+ before_install: gem install bundler -v 1.14
@@ -0,0 +1,21 @@
1
+ # CHANGELOG
2
+
3
+ ## TODO
4
+ - Add DSL.
5
+ - Add Thor based utility instead of console for downloads.
6
+ - Generate separate SQL script files.
7
+ - Codecov.
8
+ - CodeClimate.
9
+ - Rubydocs.
10
+ - AWS labs examples converted to DSL.
11
+
12
+ ## 0.2.0
13
+ - [FIX] Handle array values, e.g. 'securityGroups' => ['group1', 'group2']
14
+ - Separate SourceWriter class.
15
+ - Simplify build vs. new.
16
+ - Default id value based on pipeline object name.
17
+ - Unit tests, eg. PipelineSerializer.
18
+ - Travis.
19
+
20
+ ## 0.1.0 - 2017-05-03
21
+ - Proof of concept: get and put pipeline definitions, marshal and unmarshal into ruby objects and generate working update script output.
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
1
  # AW Datapipe
2
+ [![Gem Version](https://badge.fury.io/rb/aw_datapipe.png)](https://badge.fury.io/rb/aw_datapipe)
3
+ [![Build Status](https://travis-ci.org/varyonic/aw_datapipe.png?branch=master)](https://travis-ci.org/varyonic/aw_datapipe)
2
4
 
3
5
  AW Datapipe is an unofficial domain specific ruby wrapper for the
4
6
  [AWS SDK](http://www.rubydoc.info/github/aws/aws-sdk-ruby) Data Pipeline API.
@@ -30,8 +32,9 @@ Configure credentials for AWS SDK.
30
32
  export AWS_ACCESS_KEY_ID=AKIA****************
31
33
  export AWS_SECRET_ACCESS_KEY=********************************
32
34
  ```
33
- Use bin/console to download a pipeline definition as ruby instead of JSON.
35
+ Use a ruby console (e.g. irb) to download a pipeline definition as ruby instead of JSON.
34
36
  ```ruby
37
+ require 'aw_datapipe'
35
38
  pipelines = AwDatapipe::Session.new
36
39
  pipelines.download_definition 'df-***************', 'tmp/pipeline-definition.rb'
37
40
  ```
@@ -43,7 +46,7 @@ bundle exec ruby tmp/pipeline-definition.rb
43
46
  ```
44
47
  ## Development
45
48
 
46
- A live AWS account with a sample pipeline is required to run the tests.
49
+ A live AWS account with a sample pipeline is required to run the remote tests.
47
50
 
48
51
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
49
52
 
@@ -53,7 +56,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
53
56
 
54
57
  Bug reports and pull requests are welcome on GitHub at https://github.com/varyonic/aw_datapipe.
55
58
 
56
-
57
59
  ## License
58
60
 
59
61
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile CHANGED
@@ -1,10 +1,22 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
 
4
- Rake::TestTask.new(:test) do |t|
5
- t.libs << "test"
6
- t.libs << "lib"
7
- t.test_files = FileList['test/**/*_test.rb']
8
- end
4
+ desc "Run the unit test suite"
5
+ task :default => 'test:units'
6
+ task :test => 'test:units'
7
+
8
+ namespace :test do
9
+ Rake::TestTask.new(:units) do |t|
10
+ t.pattern = 'test/unit/**/*_test.rb'
11
+ t.ruby_opts << '-rubygems -w'
12
+ t.libs << 'test'
13
+ t.verbose = true
14
+ end
9
15
 
10
- task :default => :test
16
+ Rake::TestTask.new(:remote) do |t|
17
+ t.pattern = 'test/remote/**/*_test.rb'
18
+ t.ruby_opts << '-rubygems -w'
19
+ t.libs << 'test'
20
+ t.verbose = true
21
+ end
22
+ end
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
23
23
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
24
  spec.require_paths = ["lib"]
25
25
 
26
- spec.add_dependency("activesupport")
26
+ spec.add_dependency("activesupport", ">= 3")
27
27
  spec.add_dependency("aws-sdk", ['~> 2'])
28
28
 
29
29
  spec.add_development_dependency "bundler", "~> 1.14"
@@ -1,3 +1,4 @@
1
+ require 'active_support/core_ext/module/delegation'
1
2
  require 'active_support/inflector' # String#underscore
2
3
  require 'aws-sdk'
3
4
 
@@ -7,4 +8,5 @@ require 'aw_datapipe/pipeline_object'
7
8
  require 'aw_datapipe/pipeline_serializer'
8
9
  require 'aw_datapipe/object_hash'
9
10
  require 'aw_datapipe/session'
11
+ require 'aw_datapipe/source_writer'
10
12
  require 'aw_datapipe/version'
@@ -13,13 +13,5 @@ module AwDatapipe
13
13
  self
14
14
  end
15
15
  alias_method :<<, :append
16
-
17
- def append_with_dependents(*objects)
18
- objects.each do |object|
19
- object.dependencies.each { |dep| self.append dep }
20
- self.append object
21
- end
22
- self
23
- end
24
16
  end
25
17
  end
@@ -15,7 +15,8 @@ module AwDatapipe
15
15
  end
16
16
 
17
17
  def self.build(config, activities, parameter_metadata, parameter_values)
18
- new([], parameter_metadata, parameter_values).tap { |p| p.objects.append_with_dependents(config, *activities) }
18
+ objects = [config, *activities].map { |obj| obj.dependencies.append(obj) }.flatten
19
+ new(objects, parameter_metadata, parameter_values)
19
20
  end
20
21
 
21
22
  def configuration
@@ -32,58 +33,8 @@ module AwDatapipe
32
33
  objects.values.map(&:dependencies).flatten.uniq
33
34
  end
34
35
 
35
- def source
36
- [
37
- header_source,
38
- objects_source,
39
- parameter_metadata_source,
40
- parameter_values_source,
41
- footer_source
42
- ].join("\n")
43
- end
44
-
45
36
  def write_source(pathname)
46
- File.write(pathname, source)
47
- end
48
-
49
- protected
50
-
51
- def footer_source
52
- <<-EOF
53
- pipelines = AwDatapipe::Session.new
54
- pipeline = AwDatapipe::Pipeline.build(default, activities, parameter_metadata, parameter_values)
55
- pipeline.id = "#{id}"
56
- pipelines.save(pipeline)
57
- EOF
58
- end
59
-
60
- def header_source
61
- <<-EOF
62
- # Generated by aw_datapipe download_definition of #{id}
63
- require 'aw_datapipe'
64
- EOF
65
- end
66
-
67
- def objects_source
68
- object_ids = objects.keys
69
- unreferenced_object_ids = object_ids - referenced_object_ids
70
-
71
- s = referenced_object_ids.map { |id| "#{id} = #{objects[id].source}" }.join("\n\n")
72
- s << "\n\nactivities = [\n"
73
- s << unreferenced_object_ids.map { |id| " #{objects[id].source(2)}" }.join(",\n")
74
- s << "\n]"
75
- end
76
-
77
- def parameter_values_source
78
- "parameter_values = {\n " << parameter_values.sort.map do |key, value|
79
- "\"#{key}\" => #{value.inspect}"
80
- end.join(",\n ") << "\n}\n"
81
- end
82
-
83
- def parameter_metadata_source
84
- "parameter_metadata = {\n " << parameter_metadata.sort.map do |key, value|
85
- "\"#{key}\" => #{value.source}"
86
- end.join(",\n ") << "\n}\n"
37
+ SourceWriter.call(self, pathname)
87
38
  end
88
39
  end
89
40
  end
@@ -1,17 +1,23 @@
1
1
  module AwDatapipe
2
2
  class PipelineObject < Struct
3
+ attr_reader :id
4
+ attr_accessor :name
3
5
  attr_accessor :pipeline
4
6
 
5
7
  def self.build(params)
6
- new.tap do |struct|
7
- params.each_pair { |k, v| struct.send "#{k}=", v }
8
- end
8
+ new(params)
9
+ end
10
+
11
+ def initialize(params)
12
+ @id = params.delete(:id) || params[:name].underscore.to_sym ||
13
+ raise(ArgumentError, ":id or :name required: #{params.inspect}")
14
+ params.each_pair { |k, v| send "#{k}=", v }
9
15
  end
10
16
 
11
17
  # Iterates through struct members, recursively collecting any PipelineObjects.
12
18
  # Recursion ensures dependencies sorted before dependents.
13
19
  def dependencies
14
- (members - [:id]).each_with_object([]) do |attr_name, depends|
20
+ members.each_with_object([]) do |attr_name, depends|
15
21
  value = send(attr_name)
16
22
  value = pipeline.objects.fetch(value) if value.is_a?(Symbol)
17
23
  depends << value.dependencies << value if value.is_a?(PipelineObject)
@@ -22,14 +28,18 @@ module AwDatapipe
22
28
  self.class.name.split('::').last
23
29
  end
24
30
 
31
+ def inspect
32
+ "#<#{demodulized_class_name} #{to_hash}>"
33
+ end
34
+
25
35
  def to_hash
26
- Hash[each_pair.to_a]
36
+ Hash[each_pair.to_a].merge(id: id, name: name)
27
37
  end
28
38
 
29
39
  def source(indent_level = 1)
30
- "#{self.class.name}.build(\n" << indent(indent_level) << members.map do |member|
31
- member_source(member)
32
- end.join(",\n" << indent(indent_level)) << ")"
40
+ "#{self.class.name}.build(" << [:id, :name, *members].map do |member|
41
+ "\n" << indent(indent_level) << member_source(member)
42
+ end.join(",") << ")"
33
43
  end
34
44
 
35
45
  protected
@@ -46,18 +56,18 @@ module AwDatapipe
46
56
  end
47
57
  end
48
58
 
49
- Configuration = PipelineObject.new(:failure_and_rerun_mode, :id, :name, :pipeline_log_uri, :resource_role, :role, :schedule, :schedule_type)
50
- Schedule = PipelineObject.new(:id, :name, :period, :start_date_time)
59
+ Configuration = PipelineObject.new(:failure_and_rerun_mode, :pipeline_log_uri, :resource_role, :role, :schedule, :schedule_type)
60
+ Schedule = PipelineObject.new(:period, :start_date_time)
51
61
 
52
- Ec2Resource = PipelineObject.new(:action_on_task_failure, :id, :instance_type, :name, :security_group_ids, :subnet_id, :terminate_after)
53
- S3DataNode = PipelineObject.new(:directory_path, :id, :name)
62
+ Ec2Resource = PipelineObject.new(:action_on_task_failure, :instance_type, :security_group_ids, :subnet_id, :terminate_after)
63
+ S3DataNode = PipelineObject.new(:directory_path)
54
64
 
55
- JdbcDatabase = PipelineObject.new(:_password, :connection_string, :id, :jdbc_driver_class, :name, :username)
56
- SqlDataNode = PipelineObject.new(:database, :id, :name, :select_query, :table)
57
- CopyActivity = PipelineObject.new(:id, :input, :name, :output, :runs_on)
65
+ JdbcDatabase = PipelineObject.new(:_password, :connection_string, :jdbc_driver_class, :username)
66
+ SqlDataNode = PipelineObject.new(:database, :select_query, :table)
67
+ CopyActivity = PipelineObject.new(:input, :output, :runs_on)
58
68
 
59
- RedshiftDatabase = PipelineObject.new(:_password, :connection_string, :database_name, :id, :name, :username)
60
- RedshiftDataNode = PipelineObject.new(:create_table_sql, :database, :id, :name, :primary_keys, :schema_name, :table_name)
61
- RedshiftCopyActivity = PipelineObject.new(:id, :input, :insert_mode, :name, :output, :runs_on)
69
+ RedshiftDatabase = PipelineObject.new(:_password, :connection_string, :database_name, :username)
70
+ RedshiftDataNode = PipelineObject.new(:create_table_sql, :database, :primary_keys, :schema_name, :table_name)
71
+ RedshiftCopyActivity = PipelineObject.new(:input, :insert_mode, :output, :runs_on)
62
72
 
63
73
  end
@@ -12,9 +12,9 @@ module AwDatapipe
12
12
 
13
13
  def unmarshal(aws_definition)
14
14
  # pipeline.aws_definition = aws_definition # for troubleshooting
15
- objects = unmarshal_pipeline_objects(aws_definition)
16
- parameter_metadata = unmarshal_parameter_objects(aws_definition)
17
- parameter_values = unmarshal_parameter_values(aws_definition)
15
+ objects = unmarshal_pipeline_objects(aws_definition.pipeline_objects)
16
+ parameter_metadata = unmarshal_parameter_objects(aws_definition.parameter_objects)
17
+ parameter_values = unmarshal_parameter_values(aws_definition.parameter_values)
18
18
 
19
19
  Pipeline.new(objects, parameter_metadata, parameter_values)
20
20
  end
@@ -43,8 +43,8 @@ module AwDatapipe
43
43
  end
44
44
 
45
45
  # @return Array PipelineObject subclass instance.
46
- def unmarshal_pipeline_objects(aws_definition)
47
- aws_definition.pipeline_objects.map do |aws_struct|
46
+ def unmarshal_pipeline_objects(pipeline_objects)
47
+ pipeline_objects.map do |aws_struct|
48
48
  unmarshal_pipeline_object(aws_struct)
49
49
  end
50
50
  end
@@ -57,23 +57,33 @@ module AwDatapipe
57
57
 
58
58
  klass = AwDatapipe.const_defined?(type, false) ?
59
59
  AwDatapipe.const_get(type, false) :
60
- AwDatapipe.const_set(type, PipelineObject.new(*attributes.keys.sort))
60
+ AwDatapipe.const_set(type, PipelineObject.new(*(attributes.keys - [:id, :name])))
61
61
 
62
- klass.new(*attributes.sort.map(&:last)) # pass values sorted by keys
62
+ klass.new(attributes)
63
63
  end
64
64
 
65
65
  def fields_to_hash(fields)
66
66
  fields.each_with_object({}) do |field, hash|
67
- hash[symbolize field.key] = field.string_value || field.ref_value.underscore.to_sym
67
+ if hash[symbolize field.key]
68
+ hash[symbolize field.key] = Array(hash[symbolize field.key]) << field.string_value
69
+ else
70
+ hash[symbolize field.key] = field.string_value || field.ref_value.underscore.to_sym
71
+ end
68
72
  end
69
73
  end
70
74
 
71
75
  def hash_to_fields(hash)
72
- hash.keys.map do |key|
73
- PipelineObject === hash[key] ?
74
- { key: camelize(key, :lower), ref_value: camelize(hash[key].id) } :
75
- { key: camelize(key, :lower), string_value: hash[key] }
76
- end
76
+ hash.each_pair.map do |key, value|
77
+ camelized_key = camelize(key, :lower)
78
+ case value
79
+ when PipelineObject
80
+ { key: camelized_key, ref_value: camelize(value.id) }
81
+ when Array
82
+ value.map { |v| Hash[key: camelized_key, string_value: v] }
83
+ else
84
+ { key: camelized_key, string_value: value }
85
+ end
86
+ end.flatten
77
87
  end
78
88
 
79
89
  # Convert string to a rubyish variable name.
@@ -98,8 +108,8 @@ module AwDatapipe
98
108
  { id: key, attributes: out }
99
109
  end
100
110
 
101
- def unmarshal_parameter_objects(aws_definition)
102
- aws_definition.parameter_objects.each_with_object({}) do |object, hash|
111
+ def unmarshal_parameter_objects(parameter_objects)
112
+ parameter_objects.each_with_object({}) do |object, hash|
103
113
  klass = ParameterMetadata.new(*object.attributes.map(&:key).map(&:to_sym))
104
114
  hash[object.id] = object.attributes.each_with_object(klass.new) do |attribute, struct|
105
115
  struct.send "#{attribute.key}=", attribute.string_value
@@ -110,14 +120,22 @@ module AwDatapipe
110
120
  def marshal_parameter_values(parameter_values)
111
121
  out = []
112
122
  parameter_values.each_pair do |id, value|
113
- out << { id: id, string_value: value }
123
+ if value.respond_to?(:each)
124
+ value.each { |v| out << { id: id, string_value: v } }
125
+ else
126
+ out << { id: id, string_value: value }
127
+ end
114
128
  end
115
129
  out
116
130
  end
117
131
 
118
- def unmarshal_parameter_values(aws_definition)
119
- aws_definition.parameter_values.each_with_object({}) do |value, hash|
120
- hash[value.id] = value.string_value
132
+ def unmarshal_parameter_values(parameter_values)
133
+ parameter_values.each_with_object({}) do |value, hash|
134
+ if hash[value.id]
135
+ hash[value.id] = Array(hash[value.id]) << value.string_value
136
+ else
137
+ hash[value.id] = value.string_value
138
+ end
121
139
  end
122
140
  end
123
141
  end
@@ -0,0 +1,68 @@
1
+ module AwDatapipe
2
+ # Writes a Pipeline definition into a ruby script.
3
+ class SourceWriter
4
+ def self.call(pipeline, pathname)
5
+ new(pipeline).call(pathname)
6
+ end
7
+
8
+ attr_reader :pipeline
9
+ delegate :objects, :parameter_values, :parameter_metadata, :referenced_object_ids, to: :pipeline
10
+
11
+ def initialize(pipeline)
12
+ @pipeline = pipeline
13
+ end
14
+
15
+ def call(pathname)
16
+ File.write(pathname, source)
17
+ end
18
+
19
+ def source
20
+ [
21
+ header_source,
22
+ objects_source,
23
+ parameter_metadata_source,
24
+ parameter_values_source,
25
+ footer_source
26
+ ].join("\n")
27
+ end
28
+
29
+ protected
30
+
31
+ def header_source
32
+ <<-EOF
33
+ # Generated by aw_datapipe download_definition of #{pipeline.id}
34
+ require 'aw_datapipe'
35
+ EOF
36
+ end
37
+
38
+ def objects_source
39
+ object_ids = objects.keys
40
+ unreferenced_object_ids = object_ids - referenced_object_ids
41
+ s = referenced_object_ids.map { |id| "#{id} = #{objects[id].source}" }.join("\n\n")
42
+ s << "\n\nactivities = [\n"
43
+ s << unreferenced_object_ids.map { |id| " #{objects[id].source(2)}" }.join(",\n")
44
+ s << "\n]"
45
+ end
46
+
47
+ def parameter_metadata_source
48
+ "parameter_metadata = {\n " << parameter_metadata.sort.map do |key, value|
49
+ "\"#{key}\" => #{value.source}"
50
+ end.join(",\n ") << "\n}\n"
51
+ end
52
+
53
+ def parameter_values_source
54
+ "parameter_values = {\n " << parameter_values.sort.map do |key, value|
55
+ "\"#{key}\" => #{value.inspect}"
56
+ end.join(",\n ") << "\n}\n"
57
+ end
58
+
59
+ def footer_source
60
+ <<-EOF
61
+ pipelines = AwDatapipe::Session.new
62
+ pipeline = AwDatapipe::Pipeline.build(default, activities, parameter_metadata, parameter_values)
63
+ pipeline.id = "#{pipeline.id}"
64
+ pipelines.save(pipeline)
65
+ EOF
66
+ end
67
+ end
68
+ end
@@ -1,3 +1,3 @@
1
1
  module AwDatapipe
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aw_datapipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Piers Chambers
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: aws-sdk
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -89,6 +89,8 @@ extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
91
  - ".gitignore"
92
+ - ".travis.yml"
93
+ - CHANGELOG.md
92
94
  - Gemfile
93
95
  - LICENSE.txt
94
96
  - README.md
@@ -103,6 +105,7 @@ files:
103
105
  - lib/aw_datapipe/pipeline_object.rb
104
106
  - lib/aw_datapipe/pipeline_serializer.rb
105
107
  - lib/aw_datapipe/session.rb
108
+ - lib/aw_datapipe/source_writer.rb
106
109
  - lib/aw_datapipe/version.rb
107
110
  homepage: http://github.com/varyonic/aw_datapipe
108
111
  licenses: