pipely 0.8.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/pipely/build.rb +2 -16
  3. data/lib/pipely/build/daily_scheduler.rb +1 -1
  4. data/lib/pipely/build/definition.rb +30 -2
  5. data/lib/pipely/build/environment_config.rb +24 -1
  6. data/lib/pipely/build/s3_path_builder.rb +65 -33
  7. data/lib/pipely/deploy/bootstrap.rb +17 -14
  8. data/lib/pipely/deploy/bootstrap_context.rb +87 -10
  9. data/lib/pipely/deploy/bootstrap_registry.rb +45 -0
  10. data/lib/pipely/deploy/client.rb +33 -18
  11. data/lib/pipely/deploy/json_definition.rb +51 -0
  12. data/lib/pipely/pipeline_date_time/pipeline_date.rb +62 -0
  13. data/lib/pipely/pipeline_date_time/pipeline_date_pattern.rb +42 -0
  14. data/lib/pipely/pipeline_date_time/pipeline_date_range_base.rb +44 -0
  15. data/lib/pipely/pipeline_date_time/pipeline_day_range.rb +14 -0
  16. data/lib/pipely/pipeline_date_time/pipeline_month_range.rb +26 -0
  17. data/lib/pipely/pipeline_date_time/pipeline_year_range.rb +25 -0
  18. data/lib/pipely/tasks/definition.rb +7 -0
  19. data/lib/pipely/tasks/deploy.rb +7 -0
  20. data/lib/pipely/tasks/upload_pipeline_as_gem.rb +19 -9
  21. data/lib/pipely/version.rb +1 -1
  22. data/spec/fixtures/bootstrap_contexts/green.rb +9 -0
  23. data/spec/fixtures/bootstrap_contexts/simple.rb +9 -0
  24. data/spec/fixtures/templates/bootstrap.sh.erb +4 -0
  25. data/spec/lib/pipely/build/environment_config_spec.rb +58 -0
  26. data/spec/lib/pipely/build/s3_path_builder_spec.rb +34 -2
  27. data/spec/lib/pipely/build/template_spec.rb +10 -10
  28. data/spec/lib/pipely/build_spec.rb +29 -0
  29. data/spec/lib/pipely/deploy/bootstrap_context_spec.rb +102 -14
  30. data/spec/lib/pipely/deploy/bootstrap_registry_spec.rb +32 -0
  31. data/spec/lib/pipely/deploy/bootstrap_spec.rb +41 -24
  32. data/spec/lib/pipely/pipeline_date_time/pipeline_date_pattern_spec.rb +181 -0
  33. data/spec/lib/pipely/pipeline_date_time/pipeline_date_range_base_spec.rb +39 -0
  34. data/spec/lib/pipely/pipeline_date_time/pipeline_date_spec.rb +110 -0
  35. data/spec/lib/pipely/pipeline_date_time/pipeline_day_range_spec.rb +23 -0
  36. data/spec/lib/pipely/pipeline_date_time/pipeline_month_range_spec.rb +93 -0
  37. data/spec/lib/pipely/pipeline_date_time/pipeline_year_range_spec.rb +93 -0
  38. data/spec/lib/pipely/tasks/upload_pipeline_as_gem_spec.rb +59 -0
  39. metadata +49 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14e0eda2e643cd0b9b8fcf79b668adb5162510a8
4
- data.tar.gz: ea9199edd89bcbc3d8b6ab71a727e2a8c69e2b56
3
+ metadata.gz: 6ed7918ddfbf5b2cd544a8e4a6513d0922811456
4
+ data.tar.gz: dde98acf09526facedfa74750fb5ad9bdfbac81a
5
5
  SHA512:
6
- metadata.gz: f7f3a6df462c844a132d070af1474c3b102c40ad4592531f8e9078f5523abf0b265386b8c090a12c2dd39f18d8060d9f2d1f47b448a5eb8080dadc97fc3c9b2d
7
- data.tar.gz: d578ec42ba4eb48e6937824b2a67c6af16a7b3c700655a3d956e9584f7cce3eb1ea33b1fb8109f8a26c2db36abaa544375a67b0fbad6515d3621e43a7e508078
6
+ metadata.gz: 5b4423e586a2079f7ee3c65c4463fb577e109f8dc66e84be105ce3f45237f2e5afab85144399a83a3768f401bf16b72a01a5a3e399e63b181d4fca2ded74e2ba
7
+ data.tar.gz: 1fda022cab04e00666bf7ed4776f6b7bc35e0e6945f38d276c8cfc693dc654a7c27d738954b016534c69cfa15d265bd0c3b88c50c82c7c3ba055adc2440a8712
data/lib/pipely/build.rb CHANGED
@@ -4,6 +4,7 @@ require 'pipely/build/daily_scheduler'
4
4
  require 'pipely/build/right_now_scheduler'
5
5
  require 'pipely/build/s3_path_builder'
6
6
  require 'pipely/build/environment_config'
7
+ require 'pathology'
7
8
 
8
9
  module Pipely
9
10
 
@@ -15,22 +16,7 @@ module Pipely
15
16
  env = environment.to_sym
16
17
  config = EnvironmentConfig.load(config_path, env)
17
18
 
18
- case environment.to_sym
19
- when :production
20
- s3_prefix = "production/#{config[:namespace]}"
21
- if config[:start_time]
22
- # allow config to change pipelint start time
23
- # TODO: all scheduling should be done through config before pipely 1.0
24
- scheduler = DailyScheduler.new(config[:start_time])
25
- else
26
- scheduler = DailyScheduler.new
27
- end
28
- when :staging
29
- s3_prefix = "staging/#{`whoami`.strip}/#{config[:namespace]}"
30
- scheduler = RightNowScheduler.new
31
- end
32
-
33
- Definition.new(template, env, s3_prefix, scheduler, config)
19
+ Definition.new(template, env, config)
34
20
  end
35
21
 
36
22
  end
@@ -6,7 +6,7 @@ module Pipely
6
6
  #
7
7
  class DailyScheduler
8
8
 
9
- def initialize(start_time="11:00:00")
9
+ def initialize(start_time)
10
10
  @start_time = start_time
11
11
  end
12
12
 
@@ -3,7 +3,7 @@ module Pipely
3
3
 
4
4
  # Represent a pipeline definition, built from a Template and some config.
5
5
  #
6
- class Definition < Struct.new(:template,:env,:s3_prefix,:scheduler,:config)
6
+ class Definition < Struct.new(:template, :env, :config)
7
7
  def pipeline_name
8
8
  config[:name]
9
9
  end
@@ -12,6 +12,15 @@ module Pipely
12
12
  config[:namespace]
13
13
  end
14
14
 
15
+ def s3_prefix
16
+ if config[:s3_prefix]
17
+ template = Pathology.template(config[:s3_prefix])
18
+ template.interpolate(interpolation_context)
19
+ else
20
+ fail('unspecified s3_prefix')
21
+ end
22
+ end
23
+
15
24
  def s3_path_builder
16
25
  S3PathBuilder.new(config[:s3].merge(prefix: s3_prefix))
17
26
  end
@@ -24,7 +33,26 @@ module Pipely
24
33
 
25
34
  template.to_json
26
35
  end
27
- end
28
36
 
37
+ def scheduler
38
+ case config[:scheduler]
39
+ when 'daily'
40
+ DailyScheduler.new(config[:start_time])
41
+ when 'now'
42
+ RightNowScheduler.new
43
+ else
44
+ fail('unspecified scheduler')
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def interpolation_context
51
+ config.merge({
52
+ :whoami => `whoami`.strip,
53
+ })
54
+ end
55
+
56
+ end
29
57
  end
30
58
  end
@@ -8,9 +8,32 @@ module Pipely
8
8
  #
9
9
  class EnvironmentConfig < Hash
10
10
 
11
+ # Continue supporting env-based defaults until pipely v1.0
12
+ ENV_DEFAULTS = {
13
+ production: {
14
+ s3_prefix: 'production/:namespace',
15
+ scheduler: 'daily',
16
+ start_time: '11:00:00',
17
+ },
18
+ staging: {
19
+ s3_prefix: 'staging/:whoami/:namespace',
20
+ scheduler: 'now',
21
+
22
+ # Since scheduler can now be overridden via commandline argument,
23
+ # supply a start_time even for environments that default to 'now'.
24
+ start_time: '11:00:00',
25
+ }
26
+ }
27
+
11
28
  def self.load(filename, environment)
12
29
  raw = YAML.load_file(filename)[environment.to_s]
13
- load_from_hash(raw)
30
+ config = load_from_hash(raw)
31
+
32
+ if defaults = ENV_DEFAULTS[environment.to_sym]
33
+ defaults.merge(config)
34
+ else
35
+ config
36
+ end
14
37
  end
15
38
 
16
39
  def self.load_from_hash(attributes)
@@ -1,3 +1,5 @@
1
+ require 'pathology'
2
+
1
3
  module Pipely
2
4
  module Build
3
5
 
@@ -5,56 +7,86 @@ module Pipely
5
7
  #
6
8
  class S3PathBuilder
7
9
 
8
- attr_reader :assets_bucket, :logs_bucket, :steps_bucket
9
-
10
10
  START_TIME = "\#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}"
11
11
  START_DATE = "\#{format(@scheduledStartTime,'YYYY-MM-dd')}"
12
12
 
13
+ # options[:templates] should contain a Hash of your desired S3 path
14
+ # patterns, formatted for Pathology. The remainder of the options Hash
15
+ # serves as interpolation values for the templates.
16
+ #
17
+ # Several additional interpolation variables (:protocol, :timestamp,
18
+ # :datestamp) are provided by S3PathBuilder at interpolation time.
19
+ #
20
+ # If options[:templates] is not present, or if it is missing any of the
21
+ # legacy templates (assets, logs, steps, etc.), they will be
22
+ # automatically built, using bucket names found in the options Hash,
23
+ # preserving the original behavior.
24
+ #
13
25
  def initialize(options)
14
- @assets_bucket = options[:assets]
15
- @logs_bucket = options[:logs]
16
- @steps_bucket = options[:steps]
17
- @s3prefix = options[:prefix]
18
- end
26
+ @options = options.merge({
27
+ timestamp: START_TIME,
28
+ datestamp: START_DATE,
29
+ })
19
30
 
20
- def s3_log_prefix
21
- "s3://#{@logs_bucket}/#{@s3prefix}/#{START_TIME}"
22
- end
23
-
24
- def s3_step_prefix
25
- "s3://#{@steps_bucket}/#{@s3prefix}"
26
- end
31
+ @path_templates = default_templates
27
32
 
28
- def s3n_step_prefix
29
- "s3n://#{@steps_bucket}/#{@s3prefix}"
33
+ if templates = @options.delete(:templates)
34
+ @path_templates.merge!(templates)
35
+ end
30
36
  end
31
37
 
32
- def s3_asset_prefix
33
- "s3://#{@assets_bucket}/#{bucket_relative_s3_asset_prefix}"
34
- end
38
+ # Support legacy interface, wherein config simply contained bucket names,
39
+ # and users were forced to abide by Pipely's somewhat arbitrary path
40
+ # structure.
41
+ #
42
+ def default_templates
43
+ assets, logs, steps = @options.values_at(:assets, :logs, :steps)
35
44
 
36
- def s3n_asset_prefix
37
- "s3n://#{@assets_bucket}/#{@s3prefix}/#{START_TIME}"
45
+ {
46
+ asset: ":protocol://#{assets}/:prefix/:timestamp",
47
+ log: ":protocol://#{logs}/:prefix/:timestamp",
48
+ step: ":protocol://#{steps}/:prefix",
49
+ shared_asset: ":protocol://#{assets}/:prefix/shared/:datestamp",
50
+ bucket_relative_asset: ':prefix/:timestamp',
51
+ }
38
52
  end
39
53
 
40
- def s3_shared_asset_prefix
41
- "s3://#{@assets_bucket}/#{@s3prefix}/shared/#{START_DATE}"
54
+ # Implement path interpolation methods, e.g. s3_log_prefix, etc.
55
+ #
56
+ def method_missing(method_name, *args, &block)
57
+ case method_name
58
+ when /^(s3n?)_(.*)_prefix$/
59
+ if pattern = @path_templates[$2.to_sym]
60
+ Pathology.template(pattern).interpolate(
61
+ @options.merge({protocol: $1})
62
+ )
63
+ else
64
+ super
65
+ end
66
+ else
67
+ super
68
+ end
42
69
  end
43
70
 
71
+ # Re-route legacy method name to the standard format implemented by
72
+ # method_missing above.
73
+ #
44
74
  def bucket_relative_s3_asset_prefix
45
- "#{@s3prefix}/#{START_TIME}"
75
+ s3_bucket_relative_asset_prefix
46
76
  end
47
77
 
48
78
  def to_hash
49
- {
50
- :s3_log_prefix => s3_log_prefix,
51
- :s3_step_prefix => s3_step_prefix,
52
- :s3n_step_prefix => s3n_step_prefix,
53
- :s3_asset_prefix => s3_asset_prefix,
54
- :s3n_asset_prefix => s3n_asset_prefix,
55
- :s3_shared_asset_prefix => s3_shared_asset_prefix,
56
- :bucket_relative_s3_asset_prefix => bucket_relative_s3_asset_prefix,
57
- }
79
+ values = %w(s3 s3n).flat_map do |protocol|
80
+ @path_templates.keys.map do |path_name|
81
+ key = "#{protocol}_#{path_name}_prefix".to_sym
82
+ [key, send(key)]
83
+ end
84
+ end
85
+
86
+ # Support legacy method name.
87
+ Hash[values].merge({
88
+ bucket_relative_s3_asset_prefix: bucket_relative_s3_asset_prefix
89
+ })
58
90
  end
59
91
 
60
92
  end
@@ -1,6 +1,8 @@
1
1
  require 'pipely/bundler'
2
2
  require 'pipely/deploy/bootstrap_context'
3
+ require 'pipely/deploy/bootstrap_registry'
3
4
  require 'pipely/deploy/s3_uploader'
5
+ require 'active_support/core_ext/string/conversions'
4
6
 
5
7
  module Pipely
6
8
  module Deploy
@@ -8,23 +10,24 @@ module Pipely
8
10
  # Helps bootstrap a pipeline
9
11
  class Bootstrap
10
12
 
11
- attr_reader :project_spec
12
- attr_reader :gem_files
13
+ attr_reader :gem_files, :s3_steps_path
13
14
 
14
- def initialize(s3_uploader)
15
- @s3_uploader = s3_uploader
15
+ def initialize(gem_files, s3_steps_path)
16
+ @gem_files = gem_files
17
+ @s3_steps_path = s3_steps_path
16
18
  end
17
19
 
18
- # Builds the project's gem from gemspec, uploads the gem to s3, and
19
- # uploads all the gem dependences to S3
20
- def build_and_upload_gems
21
- @gem_files = Pipely::Bundler.gem_files
22
- @s3_uploader.upload(@gem_files.values)
23
- end
24
-
25
- def context(s3_steps_path)
26
- BootstrapContext.new.tap do |context|
27
- context.gem_files = @s3_uploader.s3_urls(gem_files.values)
20
+ def context(*mixins)
21
+ bootstrap_mixins = BootstrapRegistry.instance.register_mixins(mixins)
22
+
23
+ BootstrapContext.class_eval do
24
+ bootstrap_mixins.each do |mixin|
25
+ puts "Adding bootstrap mixin #{mixin}"
26
+ include mixin.constantize
27
+ end
28
+ self
29
+ end.new.tap do |context|
30
+ context.gem_files = gem_files
28
31
  context.s3_steps_path = s3_steps_path
29
32
  end
30
33
  end
@@ -2,27 +2,104 @@
2
2
  module Pipely
3
3
  module Deploy
4
4
 
5
- # Context passed to the erb templates
5
+ # Context passed to the erb templates, providers helpers for
6
+ # common bootstraping activities for emr and ec2 instances.
7
+ #
8
+ # bootstrap.ec2.install_gems_script
9
+ # bootstrap.emr.install_gems_script
10
+ #
6
11
  class BootstrapContext
7
- attr_accessor :gem_files
8
- attr_accessor :s3_steps_path
12
+ attr_accessor :gem_files, :s3_steps_path
13
+ attr_reader :ec2, :emr
9
14
 
10
- def install_gems_script(transport = :hadoop_fs, &blk)
11
- script = ""
15
+ # Context for EMR instances
16
+ class EmrContext
17
+ def initialize(parent)
18
+ @parent = parent
19
+ end
20
+
21
+ def install_gems_script(&blk)
22
+ @parent.install_gems_script(:hadoop_fs, &blk)
23
+ end
24
+ end
25
+
26
+ # Context for EC2 instances
27
+ class Ec2Context
28
+ def initialize(parent)
29
+ @parent = parent
30
+ @ssh_initialized = false
31
+ end
32
+
33
+ def install_gems_script(&blk)
34
+ @parent.install_gems_script(:awscli, &blk)
35
+ end
36
+
37
+ def as_root(init_ssh=true)
38
+ script = ""
39
+
40
+ if init_ssh && !@ssh_initialized
41
+ @ssh_initialized = true
42
+ script << %{
43
+ # Set up ssh access
44
+ if [ ! -f ~/.ssh/id_rsa ]; then
45
+ mkdir -p ~/.ssh
46
+ ssh-keygen -P '' -f ~/.ssh/id_rsa
47
+ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
48
+ chmod 600 ~/.ssh/authorized_keys
49
+ fi
50
+ }
51
+ end
52
+
53
+ script << %{
54
+ # Use ssh to bypass the sudo "require tty" setting
55
+ ssh -o "StrictHostKeyChecking no" -t -t ec2-user@localhost <<- EOF
56
+ sudo su -;
57
+ }
58
+
59
+ # The yield to be run as root
60
+ script << yield
12
61
 
62
+ script << %{
63
+ # exit twice, once for su and once for ssh
64
+ exit;
65
+ exit;
66
+ EOF
67
+ }
68
+ end
69
+ end
70
+
71
+ def initialize
72
+ @emr = EmrContext.new(self)
73
+ @ec2 = Ec2Context.new(self)
74
+ end
75
+
76
+ def fetch_command(transport)
13
77
  case transport.to_sym
14
78
  when :hadoop_fs
15
- transport_cmd = 'hadoop fs -copyToLocal'
79
+ 'hadoop fs -copyToLocal'
16
80
  when :awscli
17
- transport_cmd = 'aws s3 cp'
18
- else
81
+ 'aws s3 cp'
82
+ end
83
+ end
84
+
85
+ def install_gems_script(transport, &blk)
86
+
87
+ transport_cmd = fetch_command(transport)
88
+
89
+ if transport_cmd.nil?
19
90
  raise "Unsupported transport: #{transport}" unless blk
20
91
  end
21
92
 
93
+ script = ""
22
94
  @gem_files.each do |gem_file|
23
95
  filename = File.basename(gem_file)
24
- command = "#{transport_cmd} #{gem_file} #{filename}" if transport_cmd
25
- command = yield(gem_file, filename, command) if blk
96
+ params = [transport_cmd, gem_file, filename]
97
+ if blk
98
+ command = yield(*params)
99
+ else
100
+ command = params.join(" ")
101
+ end
102
+
26
103
  script << %Q[
27
104
  # #{filename}
28
105
  #{command}
@@ -0,0 +1,45 @@
1
+ require 'singleton'
2
+ require 'active_support/core_ext/string/conversions'
3
+
4
+ module Pipely
5
+ module Deploy
6
+
7
+ #
8
+ ## Registry of Mixins to be applied to the bootstrap context
9
+ #
10
+ class BootstrapRegistry
11
+ include Singleton
12
+
13
+ def initialize
14
+ @mixins = []
15
+ end
16
+
17
+ class << self
18
+ def register_mixins(*mixins)
19
+ instance.register_mixins(*mixins)
20
+ end
21
+
22
+ def mixins
23
+ instance.mixins
24
+ end
25
+ end
26
+
27
+ def register_mixins(*mixins)
28
+ new_mixins = [mixins].flatten.compact
29
+
30
+ new_mixins.each do |mixin|
31
+ begin
32
+ require mixin.underscore
33
+ rescue LoadError => e
34
+ raise "Failed to require #{mixin} for bootstrap_contexts: #{e}"
35
+ end
36
+ end
37
+ @mixins = (@mixins + new_mixins).uniq
38
+ end
39
+
40
+ def mixins
41
+ @mixins
42
+ end
43
+ end
44
+ end
45
+ end