waterworks 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/bin/waterworks +30 -0
  3. data/lib/waterworks/actions/sns_alarm.rb +22 -0
  4. data/lib/waterworks/actions/terminate.rb +18 -0
  5. data/lib/waterworks/activities/copy_activity.rb +38 -0
  6. data/lib/waterworks/activities/emr_activity.rb +43 -0
  7. data/lib/waterworks/activities/hadoop_activity.rb +44 -0
  8. data/lib/waterworks/activities/hive_activity.rb +49 -0
  9. data/lib/waterworks/activities/hive_copy_activity.rb +43 -0
  10. data/lib/waterworks/activities/pig_activity.rb +48 -0
  11. data/lib/waterworks/activities/redshift_copy_activity.rb +42 -0
  12. data/lib/waterworks/activities/shell_command_activity.rb +44 -0
  13. data/lib/waterworks/activities/sql_activity.rb +43 -0
  14. data/lib/waterworks/containers/pipeline_definition.rb +66 -0
  15. data/lib/waterworks/data_formats/csv.rb +20 -0
  16. data/lib/waterworks/data_formats/custom.rb +21 -0
  17. data/lib/waterworks/data_formats/dynamo_db_data_format.rb +19 -0
  18. data/lib/waterworks/data_formats/dynamo_db_export_data_format.rb +19 -0
  19. data/lib/waterworks/data_formats/reg_ex.rb +21 -0
  20. data/lib/waterworks/data_formats/tsv.rb +22 -0
  21. data/lib/waterworks/data_nodes/dynamo_db_data_node.rb +41 -0
  22. data/lib/waterworks/data_nodes/my_sql_data_node.rb +42 -0
  23. data/lib/waterworks/data_nodes/redshift_data_node.rb +41 -0
  24. data/lib/waterworks/data_nodes/s3_data_node.rb +42 -0
  25. data/lib/waterworks/data_nodes/sql_data_node.rb +42 -0
  26. data/lib/waterworks/databases/jdbc_database.rb +25 -0
  27. data/lib/waterworks/databases/rds_database.rb +25 -0
  28. data/lib/waterworks/databases/redshift_database.rb +25 -0
  29. data/lib/waterworks/other/default.rb +22 -0
  30. data/lib/waterworks/pipeline_object.rb +205 -0
  31. data/lib/waterworks/preconditions/dynamo_db_data_exists.rb +31 -0
  32. data/lib/waterworks/preconditions/dynamo_db_table_exists.rb +31 -0
  33. data/lib/waterworks/preconditions/exists.rb +29 -0
  34. data/lib/waterworks/preconditions/s3_key_exists.rb +31 -0
  35. data/lib/waterworks/preconditions/s3_prefix_not_empty.rb +31 -0
  36. data/lib/waterworks/preconditions/shell_command_precondition.rb +34 -0
  37. data/lib/waterworks/resources/ec2_resource.rb +57 -0
  38. data/lib/waterworks/resources/emr_cluster.rb +68 -0
  39. data/lib/waterworks/resources/http_proxy.rb +25 -0
  40. data/lib/waterworks/schedule/schedule.rb +23 -0
  41. data/lib/waterworks/util.rb +27 -0
  42. data/lib/waterworks/utilities/emr_configuration.rb +21 -0
  43. data/lib/waterworks/utilities/property.rb +20 -0
  44. data/lib/waterworks/utilities/shell_script_config.rb +20 -0
  45. data/lib/waterworks.rb +1 -0
  46. metadata +62 -5
@@ -0,0 +1,34 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class ShellCommandPrecondition < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ attemptStatus: :string,
8
+ attemptTimeout: :string,
9
+ failureAndRerunMode: :string,
10
+ lateAfterTimeout: :string,
11
+ maximumRetries: :string,
12
+ onFail: :ref,
13
+ onLateAction: :ref,
14
+ onSuccess: :ref,
15
+ parent: :ref,
16
+ preconditionTimeout: :string,
17
+ reportProgressTimeout: :string,
18
+ retryDelay: :string,
19
+ scriptArgument: :string,
20
+ stderr: :string,
21
+ stdout: :string,
22
+ command: :string,
23
+ scriptUri: :string,
24
+ type: :string,
25
+ }.merge superclass.safe_fields
26
+ end
27
+
28
+ safe_fields.keys.each { |attr| attr_accessor attr }
29
+
30
+ def initialize(id = nil, name = nil)
31
+ super(id, name).set_attrs(type: 'ShellCommandPrecondition')
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,57 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Ec2Resource < PipelineObject
5
+ # rubocop:disable Metrics/MethodLength
6
+ def self.safe_fields
7
+ {
8
+ actionOnResourceFailure: :string,
9
+ actionOnTaskFailure: :string,
10
+ associatePublicIpAddress: :string,
11
+ attemptStatus: :string,
12
+ attemptTimeout: :string,
13
+ availabilityZone: :string,
14
+ failureAndRerunMode: :string,
15
+ httpProxy: :ref,
16
+ imageId: :string,
17
+ initTimeout: :string,
18
+ instanceCount: :string,
19
+ instanceType: :string,
20
+ keyPair: :string,
21
+ lateAfterTimeout: :string,
22
+ maxActiveInstances: :string,
23
+ maximumRetries: :string,
24
+ minInstanceCount: :string,
25
+ onFail: :ref,
26
+ onLateAction: :ref,
27
+ onSuccess: :ref,
28
+ parent: :ref,
29
+ pipelineLogUri: :string,
30
+ region: :string,
31
+ reportProgressTimeout: :string,
32
+ retryDelay: :string,
33
+ runAsUser: :string,
34
+ runsOn: :ref,
35
+ scheduleType: :string,
36
+ securityGroupIds: :string,
37
+ securityGroups: :string,
38
+ spotBidPrice: :string,
39
+ subnetId: :string,
40
+ terminateAfter: :string,
41
+ useOnDemandOnLastAttempt: :string,
42
+ workerGroup: :string,
43
+ resourceRole: :string,
44
+ role: :string,
45
+ schedule: :ref,
46
+ type: :string,
47
+ }.merge superclass.safe_fields
48
+ end
49
+ # rubocop:enable Metrics/MethodLength
50
+
51
+ safe_fields.keys.each { |attr| attr_accessor attr }
52
+
53
+ def initialize(id = nil, name = nil)
54
+ super(id, name).set_attrs(type: 'Ec2Resource')
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,68 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class EmrCluster < PipelineObject
5
+ # rubocop:disable Metrics/MethodLength
6
+ def self.safe_fields
7
+ {
8
+ actionOnResourceFailure: :string,
9
+ actionOnTaskFailure: :string,
10
+ additionalMasterSecurityGroupIds: :string,
11
+ additionalSlaveSecurityGroupIds: :string,
12
+ amiVersion: :string,
13
+ applications: :string,
14
+ attemptStatus: :string,
15
+ attemptTimeout: :string,
16
+ availabilityZone: :string,
17
+ bootstrapAction: :string,
18
+ configuration: :ref,
19
+ coreInstanceBidPrice: :string,
20
+ coreInstanceCount: :string,
21
+ coreInstanceType: :string,
22
+ emrManagedMasterSecurityGroupId: :string,
23
+ emrManagedSlaveSecurityGroupId: :string,
24
+ enableDebugging: :string,
25
+ failureAndRerunMode: :string,
26
+ hadoopSchedulerType: :string,
27
+ httpProxy: :ref,
28
+ initTimeout: :string,
29
+ keyPair: :string,
30
+ lateAfterTimeout: :string,
31
+ masterInstanceBidPrice: :string,
32
+ masterInstanceType: :string,
33
+ maxActiveInstances: :string,
34
+ maximumRetries: :string,
35
+ onFail: :ref,
36
+ onLateAction: :ref,
37
+ onSuccess: :ref,
38
+ parent: :ref,
39
+ pipelineLogUri: :string,
40
+ region: :string,
41
+ releaseLabel: :string,
42
+ reportProgressTimeout: :string,
43
+ resourceRole: :string,
44
+ retryDelay: :string,
45
+ role: :string,
46
+ runsOn: :ref,
47
+ scheduleType: :string,
48
+ subnetId: :string,
49
+ supportedProducts: :string,
50
+ taskInstanceBidPrice: :string,
51
+ taskInstanceCount: :string,
52
+ taskInstanceType: :string,
53
+ terminateAfter: :string,
54
+ useOnDemandOnLastAttempt: :string,
55
+ workerGroup: :string,
56
+ schedule: :ref,
57
+ type: :string,
58
+ }.merge superclass.safe_fields
59
+ end
60
+ # rubocop:enable Metrics/MethodLength
61
+
62
+ safe_fields.keys.each { |attr| attr_accessor attr }
63
+
64
+ def initialize(id = nil, name = nil)
65
+ super(id, name).set_attrs(type: 'EmrCluster')
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,25 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class HttpProxy < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ password: :string,
9
+ s3NoProxy: :string,
10
+ username: :string,
11
+ windowsDomain: :string,
12
+ windowsWorkgroup: :string,
13
+ hostname: :string,
14
+ port: :string,
15
+ type: :string,
16
+ }.merge superclass.safe_fields
17
+ end
18
+
19
+ safe_fields.keys.each { |attr| attr_accessor attr }
20
+
21
+ def initialize(id = nil, name = nil)
22
+ super(id, name).set_attrs(type: 'HttpProxy')
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Schedule < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ endDateTime: :string,
8
+ occurrences: :string,
9
+ parent: :ref,
10
+ period: :string,
11
+ startAt: :string,
12
+ startDateTime: :string,
13
+ type: :string,
14
+ }.merge superclass.safe_fields
15
+ end
16
+
17
+ safe_fields.keys.each { |attr| attr_accessor attr }
18
+
19
+ def initialize(id = nil, name = nil)
20
+ super(id, name).set_attrs(type: 'Schedule')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,27 @@
1
+ require 'set'
2
+ module Waterworks
3
+ class Util
4
+ @asterisk_fields = Set.new(%i(password))
5
+ def self.indifferentify(hash)
6
+ hash.default_proc = proc { |h, k| h.key?(k.to_s) ? h[k.to_s] : nil }
7
+ hash
8
+ end
9
+
10
+ def self.asterisk_if_needed(field)
11
+ if @asterisk_fields.include? field.to_sym
12
+ "*#{field}".to_sym
13
+ else
14
+ field.to_sym
15
+ end
16
+ end
17
+
18
+ def self.deasterisk(key)
19
+ key = key[1..-1] if key.start_with? '*'
20
+ key
21
+ end
22
+
23
+ def time_expr(offset = '1', delim = '/')
24
+ "\#{format(minusDays(@scheduledStartTime,#{offset}),'YYYY#{delim}MM#{delim}dd')}"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class EmrConfiguration < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ configuration: :ref,
8
+ parent: :ref,
9
+ property: :ref,
10
+ classification: :string,
11
+ type: :string,
12
+ }.merge superclass.safe_fields
13
+ end
14
+
15
+ safe_fields.keys.each { |attr| attr_accessor attr }
16
+
17
+ def initialize(id = nil, name = nil)
18
+ super(id, name).set_attrs(type: 'EmrConfiguration')
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Property < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ key: :string,
9
+ value: :string,
10
+ type: :string,
11
+ }.merge superclass.safe_fields
12
+ end
13
+
14
+ safe_fields.keys.each { |attr| attr_accessor attr }
15
+
16
+ def initialize(id = nil, name = nil)
17
+ super(id, name).set_attrs(type: 'Property')
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class ShellScriptConfig < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ scriptArgument: :string,
9
+ scriptUri: :string,
10
+ type: :string,
11
+ }.merge superclass.safe_fields
12
+ end
13
+
14
+ safe_fields.keys.each { |attr| attr_accessor attr }
15
+
16
+ def initialize(id = nil, name = nil)
17
+ super(id, name).set_attrs(type: 'ShellScriptConfig')
18
+ end
19
+ end
20
+ end
data/lib/waterworks.rb ADDED
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__) + '/**/*.rb'].each { |file| require file }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: waterworks
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Logan Akamatsu
@@ -24,17 +24,74 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.4'
27
- description: It will be mine. Oh yes. It will be mine.
28
- email: logan.akamatsu@gmail.com
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.19'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.19'
41
+ description: Object representations for Amazon Data Pipelines
42
+ email:
29
43
  executables:
30
44
  - waterworks
31
45
  extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
48
  - bin/waterworks
49
+ - lib/waterworks.rb
50
+ - lib/waterworks/actions/sns_alarm.rb
51
+ - lib/waterworks/actions/terminate.rb
52
+ - lib/waterworks/activities/copy_activity.rb
53
+ - lib/waterworks/activities/emr_activity.rb
54
+ - lib/waterworks/activities/hadoop_activity.rb
55
+ - lib/waterworks/activities/hive_activity.rb
56
+ - lib/waterworks/activities/hive_copy_activity.rb
57
+ - lib/waterworks/activities/pig_activity.rb
58
+ - lib/waterworks/activities/redshift_copy_activity.rb
59
+ - lib/waterworks/activities/shell_command_activity.rb
60
+ - lib/waterworks/activities/sql_activity.rb
61
+ - lib/waterworks/containers/pipeline_definition.rb
62
+ - lib/waterworks/data_formats/csv.rb
63
+ - lib/waterworks/data_formats/custom.rb
64
+ - lib/waterworks/data_formats/dynamo_db_data_format.rb
65
+ - lib/waterworks/data_formats/dynamo_db_export_data_format.rb
66
+ - lib/waterworks/data_formats/reg_ex.rb
67
+ - lib/waterworks/data_formats/tsv.rb
68
+ - lib/waterworks/data_nodes/dynamo_db_data_node.rb
69
+ - lib/waterworks/data_nodes/my_sql_data_node.rb
70
+ - lib/waterworks/data_nodes/redshift_data_node.rb
71
+ - lib/waterworks/data_nodes/s3_data_node.rb
72
+ - lib/waterworks/data_nodes/sql_data_node.rb
73
+ - lib/waterworks/databases/jdbc_database.rb
74
+ - lib/waterworks/databases/rds_database.rb
75
+ - lib/waterworks/databases/redshift_database.rb
76
+ - lib/waterworks/other/default.rb
77
+ - lib/waterworks/pipeline_object.rb
78
+ - lib/waterworks/preconditions/dynamo_db_data_exists.rb
79
+ - lib/waterworks/preconditions/dynamo_db_table_exists.rb
80
+ - lib/waterworks/preconditions/exists.rb
81
+ - lib/waterworks/preconditions/s3_key_exists.rb
82
+ - lib/waterworks/preconditions/s3_prefix_not_empty.rb
83
+ - lib/waterworks/preconditions/shell_command_precondition.rb
84
+ - lib/waterworks/resources/ec2_resource.rb
85
+ - lib/waterworks/resources/emr_cluster.rb
86
+ - lib/waterworks/resources/http_proxy.rb
87
+ - lib/waterworks/schedule/schedule.rb
88
+ - lib/waterworks/util.rb
89
+ - lib/waterworks/utilities/emr_configuration.rb
90
+ - lib/waterworks/utilities/property.rb
91
+ - lib/waterworks/utilities/shell_script_config.rb
35
92
  homepage: http://rubygems.org/gems/waterworks
36
93
  licenses:
37
- - MIT
94
+ - Apache 2.0
38
95
  metadata: {}
39
96
  post_install_message:
40
97
  rdoc_options: []
@@ -55,5 +112,5 @@ rubyforge_project:
55
112
  rubygems_version: 2.2.2
56
113
  signing_key:
57
114
  specification_version: 4
58
- summary: Placeholder... gem to follow shortly
115
+ summary: Lightweight AWS Data Pipeline SDK
59
116
  test_files: []