waterworks 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/bin/waterworks +30 -0
  3. data/lib/waterworks/actions/sns_alarm.rb +22 -0
  4. data/lib/waterworks/actions/terminate.rb +18 -0
  5. data/lib/waterworks/activities/copy_activity.rb +38 -0
  6. data/lib/waterworks/activities/emr_activity.rb +43 -0
  7. data/lib/waterworks/activities/hadoop_activity.rb +44 -0
  8. data/lib/waterworks/activities/hive_activity.rb +49 -0
  9. data/lib/waterworks/activities/hive_copy_activity.rb +43 -0
  10. data/lib/waterworks/activities/pig_activity.rb +48 -0
  11. data/lib/waterworks/activities/redshift_copy_activity.rb +42 -0
  12. data/lib/waterworks/activities/shell_command_activity.rb +44 -0
  13. data/lib/waterworks/activities/sql_activity.rb +43 -0
  14. data/lib/waterworks/containers/pipeline_definition.rb +66 -0
  15. data/lib/waterworks/data_formats/csv.rb +20 -0
  16. data/lib/waterworks/data_formats/custom.rb +21 -0
  17. data/lib/waterworks/data_formats/dynamo_db_data_format.rb +19 -0
  18. data/lib/waterworks/data_formats/dynamo_db_export_data_format.rb +19 -0
  19. data/lib/waterworks/data_formats/reg_ex.rb +21 -0
  20. data/lib/waterworks/data_formats/tsv.rb +22 -0
  21. data/lib/waterworks/data_nodes/dynamo_db_data_node.rb +41 -0
  22. data/lib/waterworks/data_nodes/my_sql_data_node.rb +42 -0
  23. data/lib/waterworks/data_nodes/redshift_data_node.rb +41 -0
  24. data/lib/waterworks/data_nodes/s3_data_node.rb +42 -0
  25. data/lib/waterworks/data_nodes/sql_data_node.rb +42 -0
  26. data/lib/waterworks/databases/jdbc_database.rb +25 -0
  27. data/lib/waterworks/databases/rds_database.rb +25 -0
  28. data/lib/waterworks/databases/redshift_database.rb +25 -0
  29. data/lib/waterworks/other/default.rb +22 -0
  30. data/lib/waterworks/pipeline_object.rb +205 -0
  31. data/lib/waterworks/preconditions/dynamo_db_data_exists.rb +31 -0
  32. data/lib/waterworks/preconditions/dynamo_db_table_exists.rb +31 -0
  33. data/lib/waterworks/preconditions/exists.rb +29 -0
  34. data/lib/waterworks/preconditions/s3_key_exists.rb +31 -0
  35. data/lib/waterworks/preconditions/s3_prefix_not_empty.rb +31 -0
  36. data/lib/waterworks/preconditions/shell_command_precondition.rb +34 -0
  37. data/lib/waterworks/resources/ec2_resource.rb +57 -0
  38. data/lib/waterworks/resources/emr_cluster.rb +68 -0
  39. data/lib/waterworks/resources/http_proxy.rb +25 -0
  40. data/lib/waterworks/schedule/schedule.rb +23 -0
  41. data/lib/waterworks/util.rb +27 -0
  42. data/lib/waterworks/utilities/emr_configuration.rb +21 -0
  43. data/lib/waterworks/utilities/property.rb +20 -0
  44. data/lib/waterworks/utilities/shell_script_config.rb +20 -0
  45. data/lib/waterworks.rb +1 -0
  46. metadata +62 -5
@@ -0,0 +1,34 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class ShellCommandPrecondition < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ attemptStatus: :string,
8
+ attemptTimeout: :string,
9
+ failureAndRerunMode: :string,
10
+ lateAfterTimeout: :string,
11
+ maximumRetries: :string,
12
+ onFail: :ref,
13
+ onLateAction: :ref,
14
+ onSuccess: :ref,
15
+ parent: :ref,
16
+ preconditionTimeout: :string,
17
+ reportProgressTimeout: :string,
18
+ retryDelay: :string,
19
+ scriptArgument: :string,
20
+ stderr: :string,
21
+ stdout: :string,
22
+ command: :string,
23
+ scriptUri: :string,
24
+ type: :string,
25
+ }.merge superclass.safe_fields
26
+ end
27
+
28
+ safe_fields.keys.each { |attr| attr_accessor attr }
29
+
30
+ def initialize(id = nil, name = nil)
31
+ super(id, name).set_attrs(type: 'ShellCommandPrecondition')
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,57 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Ec2Resource < PipelineObject
5
+ # rubocop:disable Metrics/MethodLength
6
+ def self.safe_fields
7
+ {
8
+ actionOnResourceFailure: :string,
9
+ actionOnTaskFailure: :string,
10
+ associatePublicIpAddress: :string,
11
+ attemptStatus: :string,
12
+ attemptTimeout: :string,
13
+ availabilityZone: :string,
14
+ failureAndRerunMode: :string,
15
+ httpProxy: :ref,
16
+ imageId: :string,
17
+ initTimeout: :string,
18
+ instanceCount: :string,
19
+ instanceType: :string,
20
+ keyPair: :string,
21
+ lateAfterTimeout: :string,
22
+ maxActiveInstances: :string,
23
+ maximumRetries: :string,
24
+ minInstanceCount: :string,
25
+ onFail: :ref,
26
+ onLateAction: :ref,
27
+ onSuccess: :ref,
28
+ parent: :ref,
29
+ pipelineLogUri: :string,
30
+ region: :string,
31
+ reportProgressTimeout: :string,
32
+ retryDelay: :string,
33
+ runAsUser: :string,
34
+ runsOn: :ref,
35
+ scheduleType: :string,
36
+ securityGroupIds: :string,
37
+ securityGroups: :string,
38
+ spotBidPrice: :string,
39
+ subnetId: :string,
40
+ terminateAfter: :string,
41
+ useOnDemandOnLastAttempt: :string,
42
+ workerGroup: :string,
43
+ resourceRole: :string,
44
+ role: :string,
45
+ schedule: :ref,
46
+ type: :string,
47
+ }.merge superclass.safe_fields
48
+ end
49
+ # rubocop:enable Metrics/MethodLength
50
+
51
+ safe_fields.keys.each { |attr| attr_accessor attr }
52
+
53
+ def initialize(id = nil, name = nil)
54
+ super(id, name).set_attrs(type: 'Ec2Resource')
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,68 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class EmrCluster < PipelineObject
5
+ # rubocop:disable Metrics/MethodLength
6
+ def self.safe_fields
7
+ {
8
+ actionOnResourceFailure: :string,
9
+ actionOnTaskFailure: :string,
10
+ additionalMasterSecurityGroupIds: :string,
11
+ additionalSlaveSecurityGroupIds: :string,
12
+ amiVersion: :string,
13
+ applications: :string,
14
+ attemptStatus: :string,
15
+ attemptTimeout: :string,
16
+ availabilityZone: :string,
17
+ bootstrapAction: :string,
18
+ configuration: :ref,
19
+ coreInstanceBidPrice: :string,
20
+ coreInstanceCount: :string,
21
+ coreInstanceType: :string,
22
+ emrManagedMasterSecurityGroupId: :string,
23
+ emrManagedSlaveSecurityGroupId: :string,
24
+ enableDebugging: :string,
25
+ failureAndRerunMode: :string,
26
+ hadoopSchedulerType: :string,
27
+ httpProxy: :ref,
28
+ initTimeout: :string,
29
+ keyPair: :string,
30
+ lateAfterTimeout: :string,
31
+ masterInstanceBidPrice: :string,
32
+ masterInstanceType: :string,
33
+ maxActiveInstances: :string,
34
+ maximumRetries: :string,
35
+ onFail: :ref,
36
+ onLateAction: :ref,
37
+ onSuccess: :ref,
38
+ parent: :ref,
39
+ pipelineLogUri: :string,
40
+ region: :string,
41
+ releaseLabel: :string,
42
+ reportProgressTimeout: :string,
43
+ resourceRole: :string,
44
+ retryDelay: :string,
45
+ role: :string,
46
+ runsOn: :ref,
47
+ scheduleType: :string,
48
+ subnetId: :string,
49
+ supportedProducts: :string,
50
+ taskInstanceBidPrice: :string,
51
+ taskInstanceCount: :string,
52
+ taskInstanceType: :string,
53
+ terminateAfter: :string,
54
+ useOnDemandOnLastAttempt: :string,
55
+ workerGroup: :string,
56
+ schedule: :ref,
57
+ type: :string,
58
+ }.merge superclass.safe_fields
59
+ end
60
+ # rubocop:enable Metrics/MethodLength
61
+
62
+ safe_fields.keys.each { |attr| attr_accessor attr }
63
+
64
+ def initialize(id = nil, name = nil)
65
+ super(id, name).set_attrs(type: 'EmrCluster')
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,25 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class HttpProxy < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ password: :string,
9
+ s3NoProxy: :string,
10
+ username: :string,
11
+ windowsDomain: :string,
12
+ windowsWorkgroup: :string,
13
+ hostname: :string,
14
+ port: :string,
15
+ type: :string,
16
+ }.merge superclass.safe_fields
17
+ end
18
+
19
+ safe_fields.keys.each { |attr| attr_accessor attr }
20
+
21
+ def initialize(id = nil, name = nil)
22
+ super(id, name).set_attrs(type: 'HttpProxy')
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Schedule < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ endDateTime: :string,
8
+ occurrences: :string,
9
+ parent: :ref,
10
+ period: :string,
11
+ startAt: :string,
12
+ startDateTime: :string,
13
+ type: :string,
14
+ }.merge superclass.safe_fields
15
+ end
16
+
17
+ safe_fields.keys.each { |attr| attr_accessor attr }
18
+
19
+ def initialize(id = nil, name = nil)
20
+ super(id, name).set_attrs(type: 'Schedule')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,27 @@
1
+ require 'set'
2
+ module Waterworks
3
+ class Util
4
+ @asterisk_fields = Set.new(%i(password))
5
+ def self.indifferentify(hash)
6
+ hash.default_proc = proc { |h, k| h.key?(k.to_s) ? h[k.to_s] : nil }
7
+ hash
8
+ end
9
+
10
+ def self.asterisk_if_needed(field)
11
+ if @asterisk_fields.include? field.to_sym
12
+ "*#{field}".to_sym
13
+ else
14
+ field.to_sym
15
+ end
16
+ end
17
+
18
+ def self.deasterisk(key)
19
+ key = key[1..-1] if key.start_with? '*'
20
+ key
21
+ end
22
+
23
+ def time_expr(offset = '1', delim = '/')
24
+ "\#{format(minusDays(@scheduledStartTime,#{offset}),'YYYY#{delim}MM#{delim}dd')}"
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class EmrConfiguration < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ configuration: :ref,
8
+ parent: :ref,
9
+ property: :ref,
10
+ classification: :string,
11
+ type: :string,
12
+ }.merge superclass.safe_fields
13
+ end
14
+
15
+ safe_fields.keys.each { |attr| attr_accessor attr }
16
+
17
+ def initialize(id = nil, name = nil)
18
+ super(id, name).set_attrs(type: 'EmrConfiguration')
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,20 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class Property < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ key: :string,
9
+ value: :string,
10
+ type: :string,
11
+ }.merge superclass.safe_fields
12
+ end
13
+
14
+ safe_fields.keys.each { |attr| attr_accessor attr }
15
+
16
+ def initialize(id = nil, name = nil)
17
+ super(id, name).set_attrs(type: 'Property')
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ require_relative '../pipeline_object.rb'
2
+
3
+ module Waterworks
4
+ class ShellScriptConfig < PipelineObject
5
+ def self.safe_fields
6
+ {
7
+ parent: :ref,
8
+ scriptArgument: :string,
9
+ scriptUri: :string,
10
+ type: :string,
11
+ }.merge superclass.safe_fields
12
+ end
13
+
14
+ safe_fields.keys.each { |attr| attr_accessor attr }
15
+
16
+ def initialize(id = nil, name = nil)
17
+ super(id, name).set_attrs(type: 'ShellScriptConfig')
18
+ end
19
+ end
20
+ end
data/lib/waterworks.rb ADDED
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__) + '/**/*.rb'].each { |file| require file }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: waterworks
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Logan Akamatsu
@@ -24,17 +24,74 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.4'
27
- description: It will be mine. Oh yes. It will be mine.
28
- email: logan.akamatsu@gmail.com
27
+ - !ruby/object:Gem::Dependency
28
+ name: thor
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.19'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.19'
41
+ description: Object representations for Amazon Data Pipelines
42
+ email:
29
43
  executables:
30
44
  - waterworks
31
45
  extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
48
  - bin/waterworks
49
+ - lib/waterworks.rb
50
+ - lib/waterworks/actions/sns_alarm.rb
51
+ - lib/waterworks/actions/terminate.rb
52
+ - lib/waterworks/activities/copy_activity.rb
53
+ - lib/waterworks/activities/emr_activity.rb
54
+ - lib/waterworks/activities/hadoop_activity.rb
55
+ - lib/waterworks/activities/hive_activity.rb
56
+ - lib/waterworks/activities/hive_copy_activity.rb
57
+ - lib/waterworks/activities/pig_activity.rb
58
+ - lib/waterworks/activities/redshift_copy_activity.rb
59
+ - lib/waterworks/activities/shell_command_activity.rb
60
+ - lib/waterworks/activities/sql_activity.rb
61
+ - lib/waterworks/containers/pipeline_definition.rb
62
+ - lib/waterworks/data_formats/csv.rb
63
+ - lib/waterworks/data_formats/custom.rb
64
+ - lib/waterworks/data_formats/dynamo_db_data_format.rb
65
+ - lib/waterworks/data_formats/dynamo_db_export_data_format.rb
66
+ - lib/waterworks/data_formats/reg_ex.rb
67
+ - lib/waterworks/data_formats/tsv.rb
68
+ - lib/waterworks/data_nodes/dynamo_db_data_node.rb
69
+ - lib/waterworks/data_nodes/my_sql_data_node.rb
70
+ - lib/waterworks/data_nodes/redshift_data_node.rb
71
+ - lib/waterworks/data_nodes/s3_data_node.rb
72
+ - lib/waterworks/data_nodes/sql_data_node.rb
73
+ - lib/waterworks/databases/jdbc_database.rb
74
+ - lib/waterworks/databases/rds_database.rb
75
+ - lib/waterworks/databases/redshift_database.rb
76
+ - lib/waterworks/other/default.rb
77
+ - lib/waterworks/pipeline_object.rb
78
+ - lib/waterworks/preconditions/dynamo_db_data_exists.rb
79
+ - lib/waterworks/preconditions/dynamo_db_table_exists.rb
80
+ - lib/waterworks/preconditions/exists.rb
81
+ - lib/waterworks/preconditions/s3_key_exists.rb
82
+ - lib/waterworks/preconditions/s3_prefix_not_empty.rb
83
+ - lib/waterworks/preconditions/shell_command_precondition.rb
84
+ - lib/waterworks/resources/ec2_resource.rb
85
+ - lib/waterworks/resources/emr_cluster.rb
86
+ - lib/waterworks/resources/http_proxy.rb
87
+ - lib/waterworks/schedule/schedule.rb
88
+ - lib/waterworks/util.rb
89
+ - lib/waterworks/utilities/emr_configuration.rb
90
+ - lib/waterworks/utilities/property.rb
91
+ - lib/waterworks/utilities/shell_script_config.rb
35
92
  homepage: http://rubygems.org/gems/waterworks
36
93
  licenses:
37
- - MIT
94
+ - Apache 2.0
38
95
  metadata: {}
39
96
  post_install_message:
40
97
  rdoc_options: []
@@ -55,5 +112,5 @@ rubyforge_project:
55
112
  rubygems_version: 2.2.2
56
113
  signing_key:
57
114
  specification_version: 4
58
- summary: Placeholder... gem to follow shortly
115
+ summary: Lightweight AWS Data Pipeline SDK
59
116
  test_files: []