aw_datapipe 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe971d0b5fa7e5a8558d43e7e0940e634d3bbfeb
4
- data.tar.gz: 9ccb16f29ef04f12ff5fdbc23d042e86d762393a
3
+ metadata.gz: c7db5771c39e741d11c07927b1702e5c0db4372e
4
+ data.tar.gz: 8e005911037c8bc04e00715cdc41dc49c068c7ff
5
5
  SHA512:
6
- metadata.gz: c26e3f06941dabdda94fa17d8dabf0edd623820b7ce8579efd7f78f51ef32c9e2f085a68c7fd022b41a11431d7ca8fc026a2838e28ec92711bcda06f85293c38
7
- data.tar.gz: d7758fde384e2d782805b0b9b0c3fb2e0c3094f31a6d4d2863ac8e49664f211567054fd35e76cb260c68e139998dbdfef9f12ab69438b6180f345aface21d2f5
6
+ metadata.gz: e84ae673cb43f5819d231834e8ffd66b538ed16c50c8e82f5cac8673ae5655fa66adabc1efd45246f2477e58c3d6b4d2893b83fa61de072b474eed4416e958d4
7
+ data.tar.gz: 21740c23b42b4b01143827274fb1e7ec3508be1739cd8a0f39d14783a52007674b2c632c06c1d095169932bbe4fc09e94e51bc14556ee61f04b764833bab4a74
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.2
5
+ before_install: gem install bundler -v 1.14
@@ -0,0 +1,21 @@
1
+ # CHANGELOG
2
+
3
+ ## TODO
4
+ - Add DSL.
5
+ - Add Thor based utility instead of console for downloads.
6
+ - Generate separate SQL script files.
7
+ - Codecov.
8
+ - CodeClimate.
9
+ - Rubydocs.
10
+ - AWS labs examples converted to DSL.
11
+
12
+ ## 0.2.0
13
+ - [FIX] Handle array values, e.g. 'securityGroups' => ['group1', 'group2']
14
+ - Separate SourceWriter class.
15
+ - Simplify build vs. new.
16
+ - Default id value based on pipeline object name.
17
+ - Unit tests, eg. PipelineSerializer.
18
+ - Travis.
19
+
20
+ ## 0.1.0 - 2017-05-03
21
+ - Proof of concept: get and put pipeline definitions, marshal and unmarshal into ruby objects and generate working update script output.
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
1
  # AW Datapipe
2
+ [![Gem Version](https://badge.fury.io/rb/aw_datapipe.png)](https://badge.fury.io/rb/aw_datapipe)
3
+ [![Build Status](https://travis-ci.org/varyonic/aw_datapipe.png?branch=master)](https://travis-ci.org/varyonic/aw_datapipe)
2
4
 
3
5
  AW Datapipe is an unofficial domain specific ruby wrapper for the
4
6
  [AWS SDK](http://www.rubydoc.info/github/aws/aws-sdk-ruby) Data Pipeline API.
@@ -30,8 +32,9 @@ Configure credentials for AWS SDK.
30
32
  export AWS_ACCESS_KEY_ID=AKIA****************
31
33
  export AWS_SECRET_ACCESS_KEY=********************************
32
34
  ```
33
- Use bin/console to download a pipeline definition as ruby instead of JSON.
35
+ Use a ruby console (e.g. irb) to download a pipeline definition as ruby instead of JSON.
34
36
  ```ruby
37
+ require 'aw_datapipe'
35
38
  pipelines = AwDatapipe::Session.new
36
39
  pipelines.download_definition 'df-***************', 'tmp/pipeline-definition.rb'
37
40
  ```
@@ -43,7 +46,7 @@ bundle exec ruby tmp/pipeline-definition.rb
43
46
  ```
44
47
  ## Development
45
48
 
46
- A live AWS account with a sample pipeline is required to run the tests.
49
+ A live AWS account with a sample pipeline is required to run the remote tests.
47
50
 
48
51
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
49
52
 
@@ -53,7 +56,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
53
56
 
54
57
  Bug reports and pull requests are welcome on GitHub at https://github.com/varyonic/aw_datapipe.
55
58
 
56
-
57
59
  ## License
58
60
 
59
61
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile CHANGED
@@ -1,10 +1,22 @@
1
1
  require "bundler/gem_tasks"
2
2
  require "rake/testtask"
3
3
 
4
- Rake::TestTask.new(:test) do |t|
5
- t.libs << "test"
6
- t.libs << "lib"
7
- t.test_files = FileList['test/**/*_test.rb']
8
- end
4
+ desc "Run the unit test suite"
5
+ task :default => 'test:units'
6
+ task :test => 'test:units'
7
+
8
+ namespace :test do
9
+ Rake::TestTask.new(:units) do |t|
10
+ t.pattern = 'test/unit/**/*_test.rb'
11
+ t.ruby_opts << '-rubygems -w'
12
+ t.libs << 'test'
13
+ t.verbose = true
14
+ end
9
15
 
10
- task :default => :test
16
+ Rake::TestTask.new(:remote) do |t|
17
+ t.pattern = 'test/remote/**/*_test.rb'
18
+ t.ruby_opts << '-rubygems -w'
19
+ t.libs << 'test'
20
+ t.verbose = true
21
+ end
22
+ end
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
23
23
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
24
  spec.require_paths = ["lib"]
25
25
 
26
- spec.add_dependency("activesupport")
26
+ spec.add_dependency("activesupport", ">= 3")
27
27
  spec.add_dependency("aws-sdk", ['~> 2'])
28
28
 
29
29
  spec.add_development_dependency "bundler", "~> 1.14"
@@ -1,3 +1,4 @@
1
+ require 'active_support/core_ext/module/delegation'
1
2
  require 'active_support/inflector' # String#underscore
2
3
  require 'aws-sdk'
3
4
 
@@ -7,4 +8,5 @@ require 'aw_datapipe/pipeline_object'
7
8
  require 'aw_datapipe/pipeline_serializer'
8
9
  require 'aw_datapipe/object_hash'
9
10
  require 'aw_datapipe/session'
11
+ require 'aw_datapipe/source_writer'
10
12
  require 'aw_datapipe/version'
@@ -13,13 +13,5 @@ module AwDatapipe
13
13
  self
14
14
  end
15
15
  alias_method :<<, :append
16
-
17
- def append_with_dependents(*objects)
18
- objects.each do |object|
19
- object.dependencies.each { |dep| self.append dep }
20
- self.append object
21
- end
22
- self
23
- end
24
16
  end
25
17
  end
@@ -15,7 +15,8 @@ module AwDatapipe
15
15
  end
16
16
 
17
17
  def self.build(config, activities, parameter_metadata, parameter_values)
18
- new([], parameter_metadata, parameter_values).tap { |p| p.objects.append_with_dependents(config, *activities) }
18
+ objects = [config, *activities].map { |obj| obj.dependencies.append(obj) }.flatten
19
+ new(objects, parameter_metadata, parameter_values)
19
20
  end
20
21
 
21
22
  def configuration
@@ -32,58 +33,8 @@ module AwDatapipe
32
33
  objects.values.map(&:dependencies).flatten.uniq
33
34
  end
34
35
 
35
- def source
36
- [
37
- header_source,
38
- objects_source,
39
- parameter_metadata_source,
40
- parameter_values_source,
41
- footer_source
42
- ].join("\n")
43
- end
44
-
45
36
  def write_source(pathname)
46
- File.write(pathname, source)
47
- end
48
-
49
- protected
50
-
51
- def footer_source
52
- <<-EOF
53
- pipelines = AwDatapipe::Session.new
54
- pipeline = AwDatapipe::Pipeline.build(default, activities, parameter_metadata, parameter_values)
55
- pipeline.id = "#{id}"
56
- pipelines.save(pipeline)
57
- EOF
58
- end
59
-
60
- def header_source
61
- <<-EOF
62
- # Generated by aw_datapipe download_definition of #{id}
63
- require 'aw_datapipe'
64
- EOF
65
- end
66
-
67
- def objects_source
68
- object_ids = objects.keys
69
- unreferenced_object_ids = object_ids - referenced_object_ids
70
-
71
- s = referenced_object_ids.map { |id| "#{id} = #{objects[id].source}" }.join("\n\n")
72
- s << "\n\nactivities = [\n"
73
- s << unreferenced_object_ids.map { |id| " #{objects[id].source(2)}" }.join(",\n")
74
- s << "\n]"
75
- end
76
-
77
- def parameter_values_source
78
- "parameter_values = {\n " << parameter_values.sort.map do |key, value|
79
- "\"#{key}\" => #{value.inspect}"
80
- end.join(",\n ") << "\n}\n"
81
- end
82
-
83
- def parameter_metadata_source
84
- "parameter_metadata = {\n " << parameter_metadata.sort.map do |key, value|
85
- "\"#{key}\" => #{value.source}"
86
- end.join(",\n ") << "\n}\n"
37
+ SourceWriter.call(self, pathname)
87
38
  end
88
39
  end
89
40
  end
@@ -1,17 +1,23 @@
1
1
  module AwDatapipe
2
2
  class PipelineObject < Struct
3
+ attr_reader :id
4
+ attr_accessor :name
3
5
  attr_accessor :pipeline
4
6
 
5
7
  def self.build(params)
6
- new.tap do |struct|
7
- params.each_pair { |k, v| struct.send "#{k}=", v }
8
- end
8
+ new(params)
9
+ end
10
+
11
+ def initialize(params)
12
+ @id = params.delete(:id) || params[:name].underscore.to_sym ||
13
+ raise(ArgumentError, ":id or :name required: #{params.inspect}")
14
+ params.each_pair { |k, v| send "#{k}=", v }
9
15
  end
10
16
 
11
17
  # Iterates through struct members, recursively collecting any PipelineObjects.
12
18
  # Recursion ensures dependencies sorted before dependents.
13
19
  def dependencies
14
- (members - [:id]).each_with_object([]) do |attr_name, depends|
20
+ members.each_with_object([]) do |attr_name, depends|
15
21
  value = send(attr_name)
16
22
  value = pipeline.objects.fetch(value) if value.is_a?(Symbol)
17
23
  depends << value.dependencies << value if value.is_a?(PipelineObject)
@@ -22,14 +28,18 @@ module AwDatapipe
22
28
  self.class.name.split('::').last
23
29
  end
24
30
 
31
+ def inspect
32
+ "#<#{demodulized_class_name} #{to_hash}>"
33
+ end
34
+
25
35
  def to_hash
26
- Hash[each_pair.to_a]
36
+ Hash[each_pair.to_a].merge(id: id, name: name)
27
37
  end
28
38
 
29
39
  def source(indent_level = 1)
30
- "#{self.class.name}.build(\n" << indent(indent_level) << members.map do |member|
31
- member_source(member)
32
- end.join(",\n" << indent(indent_level)) << ")"
40
+ "#{self.class.name}.build(" << [:id, :name, *members].map do |member|
41
+ "\n" << indent(indent_level) << member_source(member)
42
+ end.join(",") << ")"
33
43
  end
34
44
 
35
45
  protected
@@ -46,18 +56,18 @@ module AwDatapipe
46
56
  end
47
57
  end
48
58
 
49
- Configuration = PipelineObject.new(:failure_and_rerun_mode, :id, :name, :pipeline_log_uri, :resource_role, :role, :schedule, :schedule_type)
50
- Schedule = PipelineObject.new(:id, :name, :period, :start_date_time)
59
+ Configuration = PipelineObject.new(:failure_and_rerun_mode, :pipeline_log_uri, :resource_role, :role, :schedule, :schedule_type)
60
+ Schedule = PipelineObject.new(:period, :start_date_time)
51
61
 
52
- Ec2Resource = PipelineObject.new(:action_on_task_failure, :id, :instance_type, :name, :security_group_ids, :subnet_id, :terminate_after)
53
- S3DataNode = PipelineObject.new(:directory_path, :id, :name)
62
+ Ec2Resource = PipelineObject.new(:action_on_task_failure, :instance_type, :security_group_ids, :subnet_id, :terminate_after)
63
+ S3DataNode = PipelineObject.new(:directory_path)
54
64
 
55
- JdbcDatabase = PipelineObject.new(:_password, :connection_string, :id, :jdbc_driver_class, :name, :username)
56
- SqlDataNode = PipelineObject.new(:database, :id, :name, :select_query, :table)
57
- CopyActivity = PipelineObject.new(:id, :input, :name, :output, :runs_on)
65
+ JdbcDatabase = PipelineObject.new(:_password, :connection_string, :jdbc_driver_class, :username)
66
+ SqlDataNode = PipelineObject.new(:database, :select_query, :table)
67
+ CopyActivity = PipelineObject.new(:input, :output, :runs_on)
58
68
 
59
- RedshiftDatabase = PipelineObject.new(:_password, :connection_string, :database_name, :id, :name, :username)
60
- RedshiftDataNode = PipelineObject.new(:create_table_sql, :database, :id, :name, :primary_keys, :schema_name, :table_name)
61
- RedshiftCopyActivity = PipelineObject.new(:id, :input, :insert_mode, :name, :output, :runs_on)
69
+ RedshiftDatabase = PipelineObject.new(:_password, :connection_string, :database_name, :username)
70
+ RedshiftDataNode = PipelineObject.new(:create_table_sql, :database, :primary_keys, :schema_name, :table_name)
71
+ RedshiftCopyActivity = PipelineObject.new(:input, :insert_mode, :output, :runs_on)
62
72
 
63
73
  end
@@ -12,9 +12,9 @@ module AwDatapipe
12
12
 
13
13
  def unmarshal(aws_definition)
14
14
  # pipeline.aws_definition = aws_definition # for troubleshooting
15
- objects = unmarshal_pipeline_objects(aws_definition)
16
- parameter_metadata = unmarshal_parameter_objects(aws_definition)
17
- parameter_values = unmarshal_parameter_values(aws_definition)
15
+ objects = unmarshal_pipeline_objects(aws_definition.pipeline_objects)
16
+ parameter_metadata = unmarshal_parameter_objects(aws_definition.parameter_objects)
17
+ parameter_values = unmarshal_parameter_values(aws_definition.parameter_values)
18
18
 
19
19
  Pipeline.new(objects, parameter_metadata, parameter_values)
20
20
  end
@@ -43,8 +43,8 @@ module AwDatapipe
43
43
  end
44
44
 
45
45
  # @return Array PipelineObject subclass instance.
46
- def unmarshal_pipeline_objects(aws_definition)
47
- aws_definition.pipeline_objects.map do |aws_struct|
46
+ def unmarshal_pipeline_objects(pipeline_objects)
47
+ pipeline_objects.map do |aws_struct|
48
48
  unmarshal_pipeline_object(aws_struct)
49
49
  end
50
50
  end
@@ -57,23 +57,33 @@ module AwDatapipe
57
57
 
58
58
  klass = AwDatapipe.const_defined?(type, false) ?
59
59
  AwDatapipe.const_get(type, false) :
60
- AwDatapipe.const_set(type, PipelineObject.new(*attributes.keys.sort))
60
+ AwDatapipe.const_set(type, PipelineObject.new(*(attributes.keys - [:id, :name])))
61
61
 
62
- klass.new(*attributes.sort.map(&:last)) # pass values sorted by keys
62
+ klass.new(attributes)
63
63
  end
64
64
 
65
65
  def fields_to_hash(fields)
66
66
  fields.each_with_object({}) do |field, hash|
67
- hash[symbolize field.key] = field.string_value || field.ref_value.underscore.to_sym
67
+ if hash[symbolize field.key]
68
+ hash[symbolize field.key] = Array(hash[symbolize field.key]) << field.string_value
69
+ else
70
+ hash[symbolize field.key] = field.string_value || field.ref_value.underscore.to_sym
71
+ end
68
72
  end
69
73
  end
70
74
 
71
75
  def hash_to_fields(hash)
72
- hash.keys.map do |key|
73
- PipelineObject === hash[key] ?
74
- { key: camelize(key, :lower), ref_value: camelize(hash[key].id) } :
75
- { key: camelize(key, :lower), string_value: hash[key] }
76
- end
76
+ hash.each_pair.map do |key, value|
77
+ camelized_key = camelize(key, :lower)
78
+ case value
79
+ when PipelineObject
80
+ { key: camelized_key, ref_value: camelize(value.id) }
81
+ when Array
82
+ value.map { |v| Hash[key: camelized_key, string_value: v] }
83
+ else
84
+ { key: camelized_key, string_value: value }
85
+ end
86
+ end.flatten
77
87
  end
78
88
 
79
89
  # Convert string to a rubyish variable name.
@@ -98,8 +108,8 @@ module AwDatapipe
98
108
  { id: key, attributes: out }
99
109
  end
100
110
 
101
- def unmarshal_parameter_objects(aws_definition)
102
- aws_definition.parameter_objects.each_with_object({}) do |object, hash|
111
+ def unmarshal_parameter_objects(parameter_objects)
112
+ parameter_objects.each_with_object({}) do |object, hash|
103
113
  klass = ParameterMetadata.new(*object.attributes.map(&:key).map(&:to_sym))
104
114
  hash[object.id] = object.attributes.each_with_object(klass.new) do |attribute, struct|
105
115
  struct.send "#{attribute.key}=", attribute.string_value
@@ -110,14 +120,22 @@ module AwDatapipe
110
120
  def marshal_parameter_values(parameter_values)
111
121
  out = []
112
122
  parameter_values.each_pair do |id, value|
113
- out << { id: id, string_value: value }
123
+ if value.respond_to?(:each)
124
+ value.each { |v| out << { id: id, string_value: v } }
125
+ else
126
+ out << { id: id, string_value: value }
127
+ end
114
128
  end
115
129
  out
116
130
  end
117
131
 
118
- def unmarshal_parameter_values(aws_definition)
119
- aws_definition.parameter_values.each_with_object({}) do |value, hash|
120
- hash[value.id] = value.string_value
132
+ def unmarshal_parameter_values(parameter_values)
133
+ parameter_values.each_with_object({}) do |value, hash|
134
+ if hash[value.id]
135
+ hash[value.id] = Array(hash[value.id]) << value.string_value
136
+ else
137
+ hash[value.id] = value.string_value
138
+ end
121
139
  end
122
140
  end
123
141
  end
@@ -0,0 +1,68 @@
1
+ module AwDatapipe
2
+ # Writes a Pipeline definition into a ruby script.
3
+ class SourceWriter
4
+ def self.call(pipeline, pathname)
5
+ new(pipeline).call(pathname)
6
+ end
7
+
8
+ attr_reader :pipeline
9
+ delegate :objects, :parameter_values, :parameter_metadata, :referenced_object_ids, to: :pipeline
10
+
11
+ def initialize(pipeline)
12
+ @pipeline = pipeline
13
+ end
14
+
15
+ def call(pathname)
16
+ File.write(pathname, source)
17
+ end
18
+
19
+ def source
20
+ [
21
+ header_source,
22
+ objects_source,
23
+ parameter_metadata_source,
24
+ parameter_values_source,
25
+ footer_source
26
+ ].join("\n")
27
+ end
28
+
29
+ protected
30
+
31
+ def header_source
32
+ <<-EOF
33
+ # Generated by aw_datapipe download_definition of #{pipeline.id}
34
+ require 'aw_datapipe'
35
+ EOF
36
+ end
37
+
38
+ def objects_source
39
+ object_ids = objects.keys
40
+ unreferenced_object_ids = object_ids - referenced_object_ids
41
+ s = referenced_object_ids.map { |id| "#{id} = #{objects[id].source}" }.join("\n\n")
42
+ s << "\n\nactivities = [\n"
43
+ s << unreferenced_object_ids.map { |id| " #{objects[id].source(2)}" }.join(",\n")
44
+ s << "\n]"
45
+ end
46
+
47
+ def parameter_metadata_source
48
+ "parameter_metadata = {\n " << parameter_metadata.sort.map do |key, value|
49
+ "\"#{key}\" => #{value.source}"
50
+ end.join(",\n ") << "\n}\n"
51
+ end
52
+
53
+ def parameter_values_source
54
+ "parameter_values = {\n " << parameter_values.sort.map do |key, value|
55
+ "\"#{key}\" => #{value.inspect}"
56
+ end.join(",\n ") << "\n}\n"
57
+ end
58
+
59
+ def footer_source
60
+ <<-EOF
61
+ pipelines = AwDatapipe::Session.new
62
+ pipeline = AwDatapipe::Pipeline.build(default, activities, parameter_metadata, parameter_values)
63
+ pipeline.id = "#{pipeline.id}"
64
+ pipelines.save(pipeline)
65
+ EOF
66
+ end
67
+ end
68
+ end
@@ -1,3 +1,3 @@
1
1
  module AwDatapipe
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aw_datapipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Piers Chambers
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: aws-sdk
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -89,6 +89,8 @@ extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
91
  - ".gitignore"
92
+ - ".travis.yml"
93
+ - CHANGELOG.md
92
94
  - Gemfile
93
95
  - LICENSE.txt
94
96
  - README.md
@@ -103,6 +105,7 @@ files:
103
105
  - lib/aw_datapipe/pipeline_object.rb
104
106
  - lib/aw_datapipe/pipeline_serializer.rb
105
107
  - lib/aw_datapipe/session.rb
108
+ - lib/aw_datapipe/source_writer.rb
106
109
  - lib/aw_datapipe/version.rb
107
110
  homepage: http://github.com/varyonic/aw_datapipe
108
111
  licenses: