pipely 0.8.3 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/pipely/build.rb +2 -16
  3. data/lib/pipely/build/daily_scheduler.rb +1 -1
  4. data/lib/pipely/build/definition.rb +30 -2
  5. data/lib/pipely/build/environment_config.rb +24 -1
  6. data/lib/pipely/build/s3_path_builder.rb +65 -33
  7. data/lib/pipely/deploy/bootstrap.rb +17 -14
  8. data/lib/pipely/deploy/bootstrap_context.rb +87 -10
  9. data/lib/pipely/deploy/bootstrap_registry.rb +45 -0
  10. data/lib/pipely/deploy/client.rb +33 -18
  11. data/lib/pipely/deploy/json_definition.rb +51 -0
  12. data/lib/pipely/pipeline_date_time/pipeline_date.rb +62 -0
  13. data/lib/pipely/pipeline_date_time/pipeline_date_pattern.rb +42 -0
  14. data/lib/pipely/pipeline_date_time/pipeline_date_range_base.rb +44 -0
  15. data/lib/pipely/pipeline_date_time/pipeline_day_range.rb +14 -0
  16. data/lib/pipely/pipeline_date_time/pipeline_month_range.rb +26 -0
  17. data/lib/pipely/pipeline_date_time/pipeline_year_range.rb +25 -0
  18. data/lib/pipely/tasks/definition.rb +7 -0
  19. data/lib/pipely/tasks/deploy.rb +7 -0
  20. data/lib/pipely/tasks/upload_pipeline_as_gem.rb +19 -9
  21. data/lib/pipely/version.rb +1 -1
  22. data/spec/fixtures/bootstrap_contexts/green.rb +9 -0
  23. data/spec/fixtures/bootstrap_contexts/simple.rb +9 -0
  24. data/spec/fixtures/templates/bootstrap.sh.erb +4 -0
  25. data/spec/lib/pipely/build/environment_config_spec.rb +58 -0
  26. data/spec/lib/pipely/build/s3_path_builder_spec.rb +34 -2
  27. data/spec/lib/pipely/build/template_spec.rb +10 -10
  28. data/spec/lib/pipely/build_spec.rb +29 -0
  29. data/spec/lib/pipely/deploy/bootstrap_context_spec.rb +102 -14
  30. data/spec/lib/pipely/deploy/bootstrap_registry_spec.rb +32 -0
  31. data/spec/lib/pipely/deploy/bootstrap_spec.rb +41 -24
  32. data/spec/lib/pipely/pipeline_date_time/pipeline_date_pattern_spec.rb +181 -0
  33. data/spec/lib/pipely/pipeline_date_time/pipeline_date_range_base_spec.rb +39 -0
  34. data/spec/lib/pipely/pipeline_date_time/pipeline_date_spec.rb +110 -0
  35. data/spec/lib/pipely/pipeline_date_time/pipeline_day_range_spec.rb +23 -0
  36. data/spec/lib/pipely/pipeline_date_time/pipeline_month_range_spec.rb +93 -0
  37. data/spec/lib/pipely/pipeline_date_time/pipeline_year_range_spec.rb +93 -0
  38. data/spec/lib/pipely/tasks/upload_pipeline_as_gem_spec.rb +59 -0
  39. metadata +49 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14e0eda2e643cd0b9b8fcf79b668adb5162510a8
4
- data.tar.gz: ea9199edd89bcbc3d8b6ab71a727e2a8c69e2b56
3
+ metadata.gz: 6ed7918ddfbf5b2cd544a8e4a6513d0922811456
4
+ data.tar.gz: dde98acf09526facedfa74750fb5ad9bdfbac81a
5
5
  SHA512:
6
- metadata.gz: f7f3a6df462c844a132d070af1474c3b102c40ad4592531f8e9078f5523abf0b265386b8c090a12c2dd39f18d8060d9f2d1f47b448a5eb8080dadc97fc3c9b2d
7
- data.tar.gz: d578ec42ba4eb48e6937824b2a67c6af16a7b3c700655a3d956e9584f7cce3eb1ea33b1fb8109f8a26c2db36abaa544375a67b0fbad6515d3621e43a7e508078
6
+ metadata.gz: 5b4423e586a2079f7ee3c65c4463fb577e109f8dc66e84be105ce3f45237f2e5afab85144399a83a3768f401bf16b72a01a5a3e399e63b181d4fca2ded74e2ba
7
+ data.tar.gz: 1fda022cab04e00666bf7ed4776f6b7bc35e0e6945f38d276c8cfc693dc654a7c27d738954b016534c69cfa15d265bd0c3b88c50c82c7c3ba055adc2440a8712
data/lib/pipely/build.rb CHANGED
@@ -4,6 +4,7 @@ require 'pipely/build/daily_scheduler'
4
4
  require 'pipely/build/right_now_scheduler'
5
5
  require 'pipely/build/s3_path_builder'
6
6
  require 'pipely/build/environment_config'
7
+ require 'pathology'
7
8
 
8
9
  module Pipely
9
10
 
@@ -15,22 +16,7 @@ module Pipely
15
16
  env = environment.to_sym
16
17
  config = EnvironmentConfig.load(config_path, env)
17
18
 
18
- case environment.to_sym
19
- when :production
20
- s3_prefix = "production/#{config[:namespace]}"
21
- if config[:start_time]
22
- # allow config to change pipelint start time
23
- # TODO: all scheduling should be done through config before pipely 1.0
24
- scheduler = DailyScheduler.new(config[:start_time])
25
- else
26
- scheduler = DailyScheduler.new
27
- end
28
- when :staging
29
- s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
30
- scheduler = RightNowScheduler.new
31
- end
32
-
33
- Definition.new(template, env, s3_prefix, scheduler, config)
19
+ Definition.new(template, env, config)
34
20
  end
35
21
 
36
22
  end
@@ -6,7 +6,7 @@ module Pipely
6
6
  #
7
7
  class DailyScheduler
8
8
 
9
- def initialize(start_time="11:00:00")
9
+ def initialize(start_time)
10
10
  @start_time = start_time
11
11
  end
12
12
 
@@ -3,7 +3,7 @@ module Pipely
3
3
 
4
4
  # Represent a pipeline definition, built from a Template and some config.
5
5
  #
6
- class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
6
+ class Definition < Struct.new(:template, :env, :config)
7
7
  def pipeline_name
8
8
  config[:name]
9
9
  end
@@ -12,6 +12,15 @@ module Pipely
12
12
  config[:namespace]
13
13
  end
14
14
 
15
+ def s3_prefix
16
+ if config[:s3_prefix]
17
+ template = Pathology.template(config[:s3_prefix])
18
+ template.interpolate(interpolation_context)
19
+ else
20
+ fail('unspecified s3_prefix')
21
+ end
22
+ end
23
+
15
24
  def s3_path_builder
16
25
  S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
17
26
  end
@@ -24,7 +33,26 @@ module Pipely
24
33
 
25
34
  template.to_json
26
35
  end
27
- end
28
36
 
37
+ def scheduler
38
+ case config[:scheduler]
39
+ when 'daily'
40
+ DailyScheduler.new(config[:start_time])
41
+ when 'now'
42
+ RightNowScheduler.new
43
+ else
44
+ fail('unspecified scheduler')
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def interpolation_context
51
+ config.merge({
52
+ :whoami => `whoami`.strip,
53
+ })
54
+ end
55
+
56
+ end
29
57
  end
30
58
  end
@@ -8,9 +8,32 @@ module Pipely
8
8
  #
9
9
  class EnvironmentConfig < Hash
10
10
 
11
+ # Continue supporting env-based defaults until pipely v1.0
12
+ ENV_DEFAULTS = {
13
+ production: {
14
+ s3_prefix: 'production/:namespace',
15
+ scheduler: 'daily',
16
+ start_time: '11:00:00',
17
+ },
18
+ staging: {
19
+ s3_prefix: 'staging/:whoami/:namespace',
20
+ scheduler: 'now',
21
+
22
+ # Since scheduler can now be overridden via commandline argument,
23
+ # supply a start_time even for environments that default to 'now'.
24
+ start_time: '11:00:00',
25
+ }
26
+ }
27
+
11
28
  def self.load(filename, environment)
12
29
  raw = YAML.load_file(filename)[environment.to_s]
13
- load_from_hash(raw)
30
+ config = load_from_hash(raw)
31
+
32
+ if defaults = ENV_DEFAULTS[environment.to_sym]
33
+ defaults.merge(config)
34
+ else
35
+ config
36
+ end
14
37
  end
15
38
 
16
39
  def self.load_from_hash(attributes)
@@ -1,3 +1,5 @@
1
+ require 'pathology'
2
+
1
3
  module Pipely
2
4
  module Build
3
5
 
@@ -5,56 +7,86 @@ module Pipely
5
7
  #
6
8
  class S3PathBuilder
7
9
 
8
- attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
-
10
10
  START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
11
  START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}"
12
12
 
13
+ # options[:templates] should contain a Hash of your desired S3 path
14
+ # patterns, formatted for Pathology. The remainder of the options Hash
15
+ # serves as interpolation values for the templates.
16
+ #
17
+ # Several additional interpolation variables (:protocol, :timestamp,
18
+ # :datestamp) are provided by S3PathBuilder at interpolation time.
19
+ #
20
+ # If options[:templates] is not present, or if it is missing any of the
21
+ # legacy templates (assets, logs, steps, etc.), they will be
22
+ # automatically built, using bucket names found in the options Hash,
23
+ # preserving the original behavior.
24
+ #
13
25
  def initialize(options)
14
- @assets_bucket = options[:assets]
15
- @logs_bucket = options[:logs]
16
- @steps_bucket = options[:steps]
17
- @s3prefix = options[:prefix]
18
- end
26
+ @options = options.merge({
27
+ timestamp: START_TIME,
28
+ datestamp: START_DATE,
29
+ })
19
30
 
20
- def s3_log_prefix
21
- "s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
22
- end
23
-
24
- def s3_step_prefix
25
- "s3://#{@steps_bucket}/#{@s3prefix}"
26
- end
31
+ @path_templates = default_templates
27
32
 
28
- def s3n_step_prefix
29
- "s3n://#{@steps_bucket}/#{@s3prefix}"
33
+ if templates = @options.delete(:templates)
34
+ @path_templates.merge!(templates)
35
+ end
30
36
  end
31
37
 
32
- def s3_asset_prefix
33
- "s3://#{@assets_bucket}/#{bucket_relative_s3_asset_prefix}"
34
- end
38
+ # Support legacy interface, wherein config simply contained bucket names,
39
+ # and users were forced to abide by Pipely's somewhat arbitrary path
40
+ # structure.
41
+ #
42
+ def default_templates
43
+ assets, logs, steps = @options.values_at(:assets, :logs, :steps)
35
44
 
36
- def s3n_asset_prefix
37
- "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
45
+ {
46
+ asset: ":protocol://#{assets}/:prefix/:timestamp",
47
+ log: ":protocol://#{logs}/:prefix/:timestamp",
48
+ step: ":protocol://#{steps}/:prefix",
49
+ shared_asset: ":protocol://#{assets}/:prefix/shared/:datestamp",
50
+ bucket_relative_asset: ':prefix/:timestamp',
51
+ }
38
52
  end
39
53
 
40
- def s3_shared_asset_prefix
41
- "s3://#{@assets_bucket}/#{@s3prefix}/shared/#{START_DATE}"
54
+ # Implement path interpolation methods, e.g. s3_log_prefix, etc.
55
+ #
56
+ def method_missing(method_name, *args, &block)
57
+ case method_name
58
+ when /^(s3n?)_(.*)_prefix$/
59
+ if pattern = @path_templates[$2.to_sym]
60
+ Pathology.template(pattern).interpolate(
61
+ @options.merge({protocol: $1})
62
+ )
63
+ else
64
+ super
65
+ end
66
+ else
67
+ super
68
+ end
42
69
  end
43
70
 
71
+ # Re-route legacy method name to the standard format implemented by
72
+ # method_missing above.
73
+ #
44
74
  def bucket_relative_s3_asset_prefix
45
- "#{@s3prefix}/#{START_TIME}"
75
+ s3_bucket_relative_asset_prefix
46
76
  end
47
77
 
48
78
  def to_hash
49
- {
50
- :s3_log_prefix => s3_log_prefix,
51
- :s3_step_prefix => s3_step_prefix,
52
- :s3n_step_prefix => s3n_step_prefix,
53
- :s3_asset_prefix => s3_asset_prefix,
54
- :s3n_asset_prefix => s3n_asset_prefix,
55
- :s3_shared_asset_prefix => s3_shared_asset_prefix,
56
- :bucket_relative_s3_asset_prefix => bucket_relative_s3_asset_prefix,
57
- }
79
+ values = %w(s3 s3n).flat_map do |protocol|
80
+ @path_templates.keys.map do |path_name|
81
+ key = "#{protocol}_#{path_name}_prefix".to_sym
82
+ [key, send(key)]
83
+ end
84
+ end
85
+
86
+ # Support legacy method name.
87
+ Hash[values].merge({
88
+ bucket_relative_s3_asset_prefix: bucket_relative_s3_asset_prefix
89
+ })
58
90
  end
59
91
 
60
92
  end
@@ -1,6 +1,8 @@
1
1
  require 'pipely/bundler'
2
2
  require 'pipely/deploy/bootstrap_context'
3
+ require 'pipely/deploy/bootstrap_registry'
3
4
  require 'pipely/deploy/s3_uploader'
5
+ require 'active_support/core_ext/string/conversions'
4
6
 
5
7
  module Pipely
6
8
  module Deploy
@@ -8,23 +10,24 @@ module Pipely
8
10
  # Helps bootstrap a pipeline
9
11
  class Bootstrap
10
12
 
11
- attr_reader :project_spec
12
- attr_reader :gem_files
13
+ attr_reader :gem_files, :s3_steps_path
13
14
 
14
- def initialize(s3_uploader)
15
- @s3_uploader = s3_uploader
15
+ def initialize(gem_files, s3_steps_path)
16
+ @gem_files = gem_files
17
+ @s3_steps_path = s3_steps_path
16
18
  end
17
19
 
18
- # Builds the project's gem from gemspec, uploads the gem to s3, and
19
- # uploads all the gem dependences to S3
20
- def build_and_upload_gems
21
- @gem_files = Pipely::Bundler.gem_files
22
- @s3_uploader.upload(@gem_files.values)
23
- end
24
-
25
- def context(s3_steps_path)
26
- BootstrapContext.new.tap do |context|
27
- context.gem_files = @s3_uploader.s3_urls(gem_files.values)
20
+ def context(*mixins)
21
+ bootstrap_mixins = BootstrapRegistry.instance.register_mixins(mixins)
22
+
23
+ BootstrapContext.class_eval do
24
+ bootstrap_mixins.each do |mixin|
25
+ puts "Adding bootstrap mixin #{mixin}"
26
+ include mixin.constantize
27
+ end
28
+ self
29
+ end.new.tap do |context|
30
+ context.gem_files = gem_files
28
31
  context.s3_steps_path = s3_steps_path
29
32
  end
30
33
  end
@@ -2,27 +2,104 @@
2
2
  module Pipely
3
3
  module Deploy
4
4
 
5
- # Context passed to the erb templates
5
+ # Context passed to the erb templates, providers helpers for
6
+ # common bootstraping activities for emr and ec2 instances.
7
+ #
8
+ # bootstrap.ec2.install_gems_script
9
+ # bootstrap.emr.install_gems_script
10
+ #
6
11
  class BootstrapContext
7
- attr_accessor :gem_files
8
- attr_accessor :s3_steps_path
12
+ attr_accessor :gem_files, :s3_steps_path
13
+ attr_reader :ec2, :emr
9
14
 
10
- def install_gems_script(transport = :hadoop_fs, &blk)
11
- script = ""
15
+ # Context for EMR instances
16
+ class EmrContext
17
+ def initialize(parent)
18
+ @parent = parent
19
+ end
20
+
21
+ def install_gems_script(&blk)
22
+ @parent.install_gems_script(:hadoop_fs, &blk)
23
+ end
24
+ end
25
+
26
+ # Context for EC2 instances
27
+ class Ec2Context
28
+ def initialize(parent)
29
+ @parent = parent
30
+ @ssh_initialized = false
31
+ end
32
+
33
+ def install_gems_script(&blk)
34
+ @parent.install_gems_script(:awscli, &blk)
35
+ end
36
+
37
+ def as_root(init_ssh=true)
38
+ script = ""
39
+
40
+ if init_ssh && !@ssh_initialized
41
+ @ssh_initialized = true
42
+ script << %{
43
+ # Set up ssh access
44
+ if [ ! -f ~/.ssh/id_rsa ]; then
45
+ mkdir -p ~/.ssh
46
+ ssh-keygen -P '' -f ~/.ssh/id_rsa
47
+ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
48
+ chmod 600 ~/.ssh/authorized_keys
49
+ fi
50
+ }
51
+ end
52
+
53
+ script << %{
54
+ # Use ssh to bypass the sudo "require tty" setting
55
+ ssh -o "StrictHostKeyChecking no" -t -t ec2-user@localhost <<- EOF
56
+ sudo su -;
57
+ }
58
+
59
+ # The yield to be run as root
60
+ script << yield
12
61
 
62
+ script << %{
63
+ # exit twice, once for su and once for ssh
64
+ exit;
65
+ exit;
66
+ EOF
67
+ }
68
+ end
69
+ end
70
+
71
+ def initialize
72
+ @emr = EmrContext.new(self)
73
+ @ec2 = Ec2Context.new(self)
74
+ end
75
+
76
+ def fetch_command(transport)
13
77
  case transport.to_sym
14
78
  when :hadoop_fs
15
- transport_cmd = 'hadoop fs -copyToLocal'
79
+ 'hadoop fs -copyToLocal'
16
80
  when :awscli
17
- transport_cmd = 'aws s3 cp'
18
- else
81
+ 'aws s3 cp'
82
+ end
83
+ end
84
+
85
+ def install_gems_script(transport, &blk)
86
+
87
+ transport_cmd = fetch_command(transport)
88
+
89
+ if transport_cmd.nil?
19
90
  raise "Unsupported transport: #{transport}" unless blk
20
91
  end
21
92
 
93
+ script = ""
22
94
  @gem_files.each do |gem_file|
23
95
  filename = File.basename(gem_file)
24
- command = "#{transport_cmd} #{gem_file} #{filename}" if transport_cmd
25
- command = yield(gem_file, filename, command) if blk
96
+ params = [transport_cmd, gem_file, filename]
97
+ if blk
98
+ command = yield(*params)
99
+ else
100
+ command = params.join(" ")
101
+ end
102
+
26
103
  script << %Q[
27
104
  # #{filename}
28
105
  #{command}
@@ -0,0 +1,45 @@
1
+ require 'singleton'
2
+ require 'active_support/core_ext/string/conversions'
3
+
4
+ module Pipely
5
+ module Deploy
6
+
7
+ #
8
+ ## Registry of Mixins to be applied to the bootstrap context
9
+ #
10
+ class BootstrapRegistry
11
+ include Singleton
12
+
13
+ def initialize
14
+ @mixins = []
15
+ end
16
+
17
+ class << self
18
+ def register_mixins(*mixins)
19
+ instance.register_mixins(*mixins)
20
+ end
21
+
22
+ def mixins
23
+ instance.mixins
24
+ end
25
+ end
26
+
27
+ def register_mixins(*mixins)
28
+ new_mixins = [mixins].flatten.compact
29
+
30
+ new_mixins.each do |mixin|
31
+ begin
32
+ require mixin.underscore
33
+ rescue LoadError => e
34
+ raise "Failed to require #{mixin} for bootstrap_contexts: #{e}"
35
+ end
36
+ end
37
+ @mixins = (@mixins + new_mixins).uniq
38
+ end
39
+
40
+ def mixins
41
+ @mixins
42
+ end
43
+ end
44
+ end
45
+ end