gjallarhorn 0.1.0.alpha → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +12 -0
- data/README.md +116 -9
- data/examples/zero-downtime-deployment.rb +104 -0
- data/lib/gjallarhorn/adapter/aws.rb +652 -0
- data/lib/gjallarhorn/adapter/base.rb +125 -0
- data/lib/gjallarhorn/cli.rb +90 -4
- data/lib/gjallarhorn/configuration.rb +64 -8
- data/lib/gjallarhorn/deployer.rb +179 -9
- data/lib/gjallarhorn/deployment/basic.rb +171 -0
- data/lib/gjallarhorn/deployment/legacy.rb +40 -0
- data/lib/gjallarhorn/deployment/strategy.rb +189 -0
- data/lib/gjallarhorn/deployment/zero_downtime.rb +276 -0
- data/lib/gjallarhorn/history.rb +164 -0
- data/lib/gjallarhorn/proxy/kamal_proxy_manager.rb +36 -0
- data/lib/gjallarhorn/proxy/manager.rb +186 -0
- data/lib/gjallarhorn/proxy/nginx_manager.rb +362 -0
- data/lib/gjallarhorn/proxy/traefik_manager.rb +36 -0
- data/lib/gjallarhorn/version.rb +1 -1
- data/lib/gjallarhorn.rb +16 -0
- metadata +101 -6
- data/lib/gjallarhorn/adapters/aws.rb +0 -96
- data/lib/gjallarhorn/adapters/base.rb +0 -56
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module Gjallarhorn
|
|
6
|
+
module Deployment
|
|
7
|
+
# Basic deployment strategy
|
|
8
|
+
#
|
|
9
|
+
# Implements a simple deployment strategy that stops old containers
|
|
10
|
+
# and starts new ones without zero-downtime guarantees. This is the
|
|
11
|
+
# fallback strategy when zero-downtime features are not needed.
|
|
12
|
+
#
|
|
13
|
+
# @since 0.1.0
|
|
14
|
+
class Basic < Strategy
|
|
15
|
+
# Deploy services using basic strategy
|
|
16
|
+
#
|
|
17
|
+
# @param image [String] Container image to deploy
|
|
18
|
+
# @param environment [String] Target environment
|
|
19
|
+
# @param services [Array<Hash>] Services to deploy
|
|
20
|
+
# @return [void]
|
|
21
|
+
def deploy(image:, environment:, services:)
|
|
22
|
+
@logger.info "Starting basic deployment of #{image} to #{environment}"
|
|
23
|
+
|
|
24
|
+
services.each do |service|
|
|
25
|
+
@logger.info "Deploying service: #{service[:name]}"
|
|
26
|
+
deploy_service_basic(service, image, environment)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
@logger.info "Basic deployment completed successfully"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if strategy supports zero-downtime deployments
|
|
33
|
+
#
|
|
34
|
+
# @return [Boolean] Always false for basic strategy
|
|
35
|
+
def zero_downtime?
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
# Deploy a single service using basic strategy
|
|
42
|
+
#
|
|
43
|
+
# @param service [Hash] Service configuration
|
|
44
|
+
# @param image [String] Container image to deploy
|
|
45
|
+
# @param environment [String] Target environment
|
|
46
|
+
# @return [void]
|
|
47
|
+
def deploy_service_basic(service, image, environment)
|
|
48
|
+
service_name = service[:name]
|
|
49
|
+
|
|
50
|
+
# Step 1: Stop existing containers
|
|
51
|
+
@logger.info "Stopping existing containers for #{service_name}..."
|
|
52
|
+
stop_existing_containers(service_name)
|
|
53
|
+
|
|
54
|
+
# Step 2: Start new container
|
|
55
|
+
new_container = start_new_container(service, image, environment)
|
|
56
|
+
@logger.info "Started new container: #{new_container[:name]} (#{new_container[:id]})"
|
|
57
|
+
|
|
58
|
+
# Step 3: Wait for container to be running
|
|
59
|
+
wait_for_container_running(new_container)
|
|
60
|
+
|
|
61
|
+
# Step 4: Optional health check
|
|
62
|
+
if service[:healthcheck]
|
|
63
|
+
@logger.info "Waiting for health check to pass..."
|
|
64
|
+
wait_for_container_health(new_container, service[:healthcheck])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
@logger.info "Service #{service_name} deployed successfully"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Stop all existing containers for a service
|
|
71
|
+
#
|
|
72
|
+
# @param service_name [String] Service name
|
|
73
|
+
# @return [void]
|
|
74
|
+
def stop_existing_containers(service_name)
|
|
75
|
+
current_containers = @adapter.get_running_containers(service_name)
|
|
76
|
+
|
|
77
|
+
current_containers.each do |container|
|
|
78
|
+
@logger.info "Stopping container: #{container[:name]} (#{container[:id]})"
|
|
79
|
+
@adapter.stop_container(container[:id], graceful: true)
|
|
80
|
+
@adapter.remove_container(container[:id])
|
|
81
|
+
end
|
|
82
|
+
rescue StandardError => e
|
|
83
|
+
@logger.warn "Failed to stop some existing containers: #{e.message}"
|
|
84
|
+
# Continue with deployment even if cleanup fails
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Start a new container for the service
|
|
88
|
+
#
|
|
89
|
+
# @param service [Hash] Service configuration
|
|
90
|
+
# @param image [String] Container image
|
|
91
|
+
# @param environment [String] Target environment
|
|
92
|
+
# @return [Hash] New container information
|
|
93
|
+
def start_new_container(service, image, environment)
|
|
94
|
+
container_name = generate_container_name(service[:name])
|
|
95
|
+
|
|
96
|
+
container_config = {
|
|
97
|
+
name: container_name,
|
|
98
|
+
image: image,
|
|
99
|
+
ports: service[:ports] || [],
|
|
100
|
+
env: build_environment_variables(service, environment),
|
|
101
|
+
volumes: service[:volumes] || [],
|
|
102
|
+
command: service[:cmd],
|
|
103
|
+
labels: build_container_labels(service, environment),
|
|
104
|
+
restart_policy: service[:restart_policy] || "unless-stopped"
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
@adapter.start_container(container_config)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Build environment variables for container
|
|
111
|
+
#
|
|
112
|
+
# @param service [Hash] Service configuration
|
|
113
|
+
# @param environment [String] Target environment
|
|
114
|
+
# @return [Hash] Environment variables
|
|
115
|
+
def build_environment_variables(service, environment)
|
|
116
|
+
env_vars = {
|
|
117
|
+
"GJALLARHORN_SERVICE" => service[:name],
|
|
118
|
+
"GJALLARHORN_ENVIRONMENT" => environment,
|
|
119
|
+
"GJALLARHORN_DEPLOYED_AT" => Time.now.utc.iso8601
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
# Add service-specific environment variables
|
|
123
|
+
env_vars.merge!(service[:env]) if service[:env]
|
|
124
|
+
|
|
125
|
+
env_vars
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Build container labels for identification and management
|
|
129
|
+
#
|
|
130
|
+
# @param service [Hash] Service configuration
|
|
131
|
+
# @param environment [String] Target environment
|
|
132
|
+
# @return [Hash] Container labels
|
|
133
|
+
def build_container_labels(service, environment)
|
|
134
|
+
{
|
|
135
|
+
"gjallarhorn.service" => service[:name],
|
|
136
|
+
"gjallarhorn.environment" => environment,
|
|
137
|
+
"gjallarhorn.role" => service[:role] || "web",
|
|
138
|
+
"gjallarhorn.deployed_at" => Time.now.utc.iso8601,
|
|
139
|
+
"gjallarhorn.strategy" => "basic"
|
|
140
|
+
}
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Wait for container to be in running state
|
|
144
|
+
#
|
|
145
|
+
# @param container [Hash] Container information
|
|
146
|
+
# @param timeout [Integer] Timeout in seconds
|
|
147
|
+
# @return [Boolean] True when container is running
|
|
148
|
+
def wait_for_container_running(container, timeout = 60)
|
|
149
|
+
start_time = Time.now
|
|
150
|
+
|
|
151
|
+
loop do
|
|
152
|
+
status = @adapter.get_container_status(container[:id])
|
|
153
|
+
|
|
154
|
+
if status == "running"
|
|
155
|
+
@logger.info "Container #{container[:name]} is running"
|
|
156
|
+
return true
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
elapsed = Time.now - start_time
|
|
160
|
+
if elapsed >= timeout
|
|
161
|
+
raise DeploymentError,
|
|
162
|
+
"Container #{container[:name]} failed to start within #{timeout}s (status: #{status})"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
@logger.debug "Container #{container[:name]} status: #{status}, waiting..."
|
|
166
|
+
sleep 2
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module Gjallarhorn
|
|
6
|
+
module Deployment
|
|
7
|
+
# Legacy deployment strategy
|
|
8
|
+
#
|
|
9
|
+
# Maintains backward compatibility with the original deployment interface
|
|
10
|
+
# by calling the adapter's deploy method directly. This is used for
|
|
11
|
+
# testing and for adapters that haven't been updated to the new
|
|
12
|
+
# container management interface.
|
|
13
|
+
#
|
|
14
|
+
# @since 0.1.0
|
|
15
|
+
class Legacy < Strategy
|
|
16
|
+
# Deploy services using legacy adapter interface
|
|
17
|
+
#
|
|
18
|
+
# @param image [String] Container image to deploy
|
|
19
|
+
# @param environment [String] Target environment
|
|
20
|
+
# @param services [Array<Hash>] Services to deploy
|
|
21
|
+
# @return [void]
|
|
22
|
+
def deploy(image:, environment:, services:)
|
|
23
|
+
@logger.info "Using legacy deployment interface"
|
|
24
|
+
|
|
25
|
+
@adapter.deploy(
|
|
26
|
+
image: image,
|
|
27
|
+
environment: environment,
|
|
28
|
+
services: services
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Check if strategy supports zero-downtime deployments
|
|
33
|
+
#
|
|
34
|
+
# @return [Boolean] Always false for legacy strategy
|
|
35
|
+
def zero_downtime?
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
|
|
5
|
+
module Gjallarhorn
|
|
6
|
+
module Deployment
|
|
7
|
+
# Base class for deployment strategies
|
|
8
|
+
#
|
|
9
|
+
# Defines the interface that all deployment strategies must implement.
|
|
10
|
+
# Each strategy handles the specifics of how containers are deployed,
|
|
11
|
+
# updated, and managed during the deployment process.
|
|
12
|
+
#
|
|
13
|
+
# @abstract Subclass and override deployment methods
|
|
14
|
+
# @since 0.1.0
|
|
15
|
+
class Strategy
|
|
16
|
+
attr_reader :adapter, :proxy_manager, :logger
|
|
17
|
+
|
|
18
|
+
# Initialize a deployment strategy
|
|
19
|
+
#
|
|
20
|
+
# @param adapter [Adapter::Base] The cloud adapter to use
|
|
21
|
+
# @param proxy_manager [Proxy::Manager] The proxy manager for traffic switching
|
|
22
|
+
# @param logger [Logger] Logger instance
|
|
23
|
+
def initialize(adapter, proxy_manager, logger = nil)
|
|
24
|
+
@adapter = adapter
|
|
25
|
+
@proxy_manager = proxy_manager
|
|
26
|
+
@logger = logger || Logger.new($stdout)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Deploy services using this strategy
|
|
30
|
+
#
|
|
31
|
+
# @param image [String] Container image to deploy
|
|
32
|
+
# @param environment [String] Target environment
|
|
33
|
+
# @param services [Array<Hash>] Services to deploy
|
|
34
|
+
# @abstract Subclasses must implement this method
|
|
35
|
+
# @raise [NotImplementedError] If not implemented by subclass
|
|
36
|
+
# @return [void]
|
|
37
|
+
def deploy(image:, environment:, services:)
|
|
38
|
+
raise NotImplementedError, "Subclasses must implement deploy method"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Check if strategy supports zero-downtime deployments
|
|
42
|
+
#
|
|
43
|
+
# @return [Boolean] True if strategy supports zero-downtime
|
|
44
|
+
def zero_downtime?
|
|
45
|
+
false
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Get strategy name
|
|
49
|
+
#
|
|
50
|
+
# @return [String] Strategy name
|
|
51
|
+
def name
|
|
52
|
+
self.class.name.split("::").last.downcase
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
protected
|
|
56
|
+
|
|
57
|
+
# Wait for container to become healthy
|
|
58
|
+
#
|
|
59
|
+
# @param container [Hash] Container information
|
|
60
|
+
# @param healthcheck [Hash] Health check configuration
|
|
61
|
+
# @param timeout [Integer] Timeout in seconds
|
|
62
|
+
# @raise [HealthCheckTimeoutError] If health check times out
|
|
63
|
+
# @return [Boolean] True when container is healthy
|
|
64
|
+
def wait_for_container_health(container, healthcheck, timeout = 300)
|
|
65
|
+
max_attempts = healthcheck[:max_attempts] || 30
|
|
66
|
+
interval = healthcheck[:interval] || 3
|
|
67
|
+
start_time = Time.now
|
|
68
|
+
|
|
69
|
+
attempts = 0
|
|
70
|
+
|
|
71
|
+
loop do
|
|
72
|
+
if container_healthy?(container, healthcheck)
|
|
73
|
+
@logger.info "Container #{container[:name]} passed health check after #{attempts} attempts"
|
|
74
|
+
return true
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
attempts += 1
|
|
78
|
+
elapsed = Time.now - start_time
|
|
79
|
+
|
|
80
|
+
if attempts >= max_attempts || elapsed >= timeout
|
|
81
|
+
raise HealthCheckTimeoutError,
|
|
82
|
+
"Container #{container[:name]} failed health check after #{attempts} attempts (#{elapsed.round(1)}s)"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
@logger.debug "Health check attempt #{attempts}/#{max_attempts} failed, retrying in #{interval}s..."
|
|
86
|
+
sleep interval
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Check if container is healthy
|
|
91
|
+
#
|
|
92
|
+
# @param container [Hash] Container information
|
|
93
|
+
# @param healthcheck [Hash] Health check configuration
|
|
94
|
+
# @return [Boolean] True if container is healthy
|
|
95
|
+
def container_healthy?(container, healthcheck)
|
|
96
|
+
case healthcheck[:type]
|
|
97
|
+
when "http", nil
|
|
98
|
+
http_health_check(container, healthcheck)
|
|
99
|
+
when "command"
|
|
100
|
+
command_health_check(container, healthcheck)
|
|
101
|
+
when "docker"
|
|
102
|
+
docker_health_check(container)
|
|
103
|
+
else
|
|
104
|
+
@logger.warn "Unknown health check type: #{healthcheck[:type]}, defaulting to HTTP"
|
|
105
|
+
http_health_check(container, healthcheck)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Perform HTTP health check
|
|
110
|
+
#
|
|
111
|
+
# @param container [Hash] Container information
|
|
112
|
+
# @param healthcheck [Hash] Health check configuration
|
|
113
|
+
# @return [Boolean] True if HTTP check passes
|
|
114
|
+
def http_health_check(container, healthcheck)
|
|
115
|
+
require "net/http"
|
|
116
|
+
require "uri"
|
|
117
|
+
|
|
118
|
+
path = healthcheck[:path] || "/health"
|
|
119
|
+
port = healthcheck[:port] || 3000
|
|
120
|
+
expected_status = healthcheck[:expected_status] || [200, 204]
|
|
121
|
+
expected_status = [expected_status] unless expected_status.is_a?(Array)
|
|
122
|
+
|
|
123
|
+
# Try to get container IP or use localhost if running locally
|
|
124
|
+
host = container[:ip] || container[:host] || "localhost"
|
|
125
|
+
url = "http://#{host}:#{port}#{path}"
|
|
126
|
+
|
|
127
|
+
begin
|
|
128
|
+
uri = URI(url)
|
|
129
|
+
response = Net::HTTP.get_response(uri)
|
|
130
|
+
status_ok = expected_status.include?(response.code.to_i)
|
|
131
|
+
|
|
132
|
+
@logger.debug "Health check #{url} returned #{response.code}" if status_ok
|
|
133
|
+
status_ok
|
|
134
|
+
rescue StandardError => e
|
|
135
|
+
@logger.debug "Health check failed: #{e.message}"
|
|
136
|
+
false
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Perform command-based health check
|
|
141
|
+
#
|
|
142
|
+
# @param container [Hash] Container information
|
|
143
|
+
# @param healthcheck [Hash] Health check configuration
|
|
144
|
+
# @return [Boolean] True if command succeeds
|
|
145
|
+
def command_health_check(container, healthcheck)
|
|
146
|
+
command = healthcheck[:command] || healthcheck[:cmd]
|
|
147
|
+
return false unless command
|
|
148
|
+
|
|
149
|
+
begin
|
|
150
|
+
@adapter.execute_in_container(container[:id], command)
|
|
151
|
+
true
|
|
152
|
+
rescue StandardError => e
|
|
153
|
+
@logger.debug "Command health check failed: #{e.message}"
|
|
154
|
+
false
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Perform Docker health check
|
|
159
|
+
#
|
|
160
|
+
# @param container [Hash] Container information
|
|
161
|
+
# @return [Boolean] True if Docker reports container as healthy
|
|
162
|
+
def docker_health_check(container)
|
|
163
|
+
@adapter.get_container_health(container[:id])
|
|
164
|
+
rescue StandardError => e
|
|
165
|
+
@logger.debug "Docker health check failed: #{e.message}"
|
|
166
|
+
false
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Generate unique container name with timestamp
|
|
170
|
+
#
|
|
171
|
+
# @param service_name [String] Base service name
|
|
172
|
+
# @return [String] Unique container name
|
|
173
|
+
def generate_container_name(service_name)
|
|
174
|
+
timestamp = Time.now.strftime("%Y%m%d-%H%M%S")
|
|
175
|
+
"#{service_name}-#{timestamp}-#{SecureRandom.hex(4)}"
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Generate version suffix for tracking
|
|
179
|
+
#
|
|
180
|
+
# @return [String] Version suffix
|
|
181
|
+
def generate_version_suffix
|
|
182
|
+
Time.now.strftime("%Y%m%d-%H%M%S")
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Raised when health checks timeout
|
|
187
|
+
class HealthCheckTimeoutError < Error; end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module Gjallarhorn
|
|
6
|
+
module Deployment
|
|
7
|
+
# Zero-downtime deployment strategy
|
|
8
|
+
#
|
|
9
|
+
# Implements zero-downtime deployments by:
|
|
10
|
+
# 1. Starting new containers alongside existing ones
|
|
11
|
+
# 2. Waiting for new containers to pass health checks
|
|
12
|
+
# 3. Switching proxy traffic to new containers
|
|
13
|
+
# 4. Gracefully stopping old containers
|
|
14
|
+
#
|
|
15
|
+
# This ensures continuous service availability during deployments.
|
|
16
|
+
#
|
|
17
|
+
# @since 0.1.0
|
|
18
|
+
class ZeroDowntime < Strategy
|
|
19
|
+
# Deploy services with zero downtime
|
|
20
|
+
#
|
|
21
|
+
# @param image [String] Container image to deploy
|
|
22
|
+
# @param environment [String] Target environment
|
|
23
|
+
# @param services [Array<Hash>] Services to deploy
|
|
24
|
+
# @return [void]
|
|
25
|
+
def deploy(image:, environment:, services:)
|
|
26
|
+
@logger.info "Starting zero-downtime deployment of #{image} to #{environment}"
|
|
27
|
+
|
|
28
|
+
# Set the environment on the adapter so it knows which environment we're deploying to
|
|
29
|
+
@adapter.set_environment(environment) if @adapter.respond_to?(:set_environment)
|
|
30
|
+
|
|
31
|
+
deployment_results = []
|
|
32
|
+
|
|
33
|
+
services.each do |service|
|
|
34
|
+
# Convert string keys to symbols for consistency
|
|
35
|
+
service = service.transform_keys(&:to_sym) if service.is_a?(Hash)
|
|
36
|
+
@logger.info "Deploying service: #{service[:name]}"
|
|
37
|
+
result = deploy_service_zero_downtime(service, image, environment)
|
|
38
|
+
deployment_results << result
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
@logger.info "Zero-downtime deployment completed successfully"
|
|
42
|
+
deployment_results
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Check if strategy supports zero-downtime deployments
|
|
46
|
+
#
|
|
47
|
+
# @return [Boolean] Always true for this strategy
|
|
48
|
+
def zero_downtime?
|
|
49
|
+
true
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Deploy a single service with zero downtime
|
|
55
|
+
#
|
|
56
|
+
# @param service [Hash] Service configuration
|
|
57
|
+
# @param image [String] Container image to deploy
|
|
58
|
+
# @param environment [String] Target environment
|
|
59
|
+
# @return [Hash] Deployment result
|
|
60
|
+
def deploy_service_zero_downtime(service, image, environment)
|
|
61
|
+
# Ensure service hash uses symbol keys
|
|
62
|
+
service = service.transform_keys(&:to_sym) if service.is_a?(Hash)
|
|
63
|
+
service_name = service[:name]
|
|
64
|
+
|
|
65
|
+
# Step 1: Get current running containers
|
|
66
|
+
current_containers = get_current_containers(service_name)
|
|
67
|
+
@logger.info "Found #{current_containers.length} existing containers for #{service_name}"
|
|
68
|
+
|
|
69
|
+
# Step 2: Start new container
|
|
70
|
+
new_container = start_new_container(service, image, environment)
|
|
71
|
+
@logger.info "Started new container: #{new_container[:name]} (#{new_container[:id]})"
|
|
72
|
+
|
|
73
|
+
# Step 3: Wait for new container to be healthy
|
|
74
|
+
if service[:healthcheck]
|
|
75
|
+
@logger.info "Waiting for health check to pass..."
|
|
76
|
+
wait_for_container_health(new_container, service[:healthcheck])
|
|
77
|
+
else
|
|
78
|
+
@logger.info "No health check configured, waiting for container to be running..."
|
|
79
|
+
wait_for_container_running(new_container)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Step 4: Update proxy routing to new container
|
|
83
|
+
if @proxy_manager
|
|
84
|
+
@logger.info "Switching proxy traffic to new container..."
|
|
85
|
+
@proxy_manager.switch_traffic(
|
|
86
|
+
service_name: service_name,
|
|
87
|
+
from_containers: current_containers,
|
|
88
|
+
to_container: new_container
|
|
89
|
+
)
|
|
90
|
+
else
|
|
91
|
+
@logger.warn "No proxy manager configured, skipping traffic switch"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Step 5: Gracefully stop old containers
|
|
95
|
+
if current_containers.any?
|
|
96
|
+
@logger.info "Stopping #{current_containers.length} old containers..."
|
|
97
|
+
stop_old_containers(current_containers, service[:drain_timeout] || 30)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Step 6: Clean up old containers
|
|
101
|
+
cleanup_old_containers(service_name, new_container[:id])
|
|
102
|
+
|
|
103
|
+
{
|
|
104
|
+
service: service_name,
|
|
105
|
+
old_containers: current_containers.map { |c| c[:id] },
|
|
106
|
+
new_container: new_container[:id],
|
|
107
|
+
status: "success"
|
|
108
|
+
}
|
|
109
|
+
rescue StandardError => e
|
|
110
|
+
@logger.error "Failed to deploy #{service_name}: #{e.message}"
|
|
111
|
+
|
|
112
|
+
# Cleanup: Remove the new container if deployment failed
|
|
113
|
+
if defined?(new_container) && new_container
|
|
114
|
+
@logger.info "Cleaning up failed deployment container..."
|
|
115
|
+
@adapter.stop_container(new_container[:id], graceful: false)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
raise DeploymentError, "Zero-downtime deployment failed for #{service_name}: #{e.message}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Get currently running containers for a service
|
|
122
|
+
#
|
|
123
|
+
# @param service_name [String] Service name
|
|
124
|
+
# @return [Array<Hash>] Array of container information
|
|
125
|
+
def get_current_containers(service_name)
|
|
126
|
+
@logger.debug "get_current_containers: Calling adapter.get_running_containers for service: #{service_name}"
|
|
127
|
+
@adapter.get_running_containers(service_name)
|
|
128
|
+
rescue StandardError => e
|
|
129
|
+
@logger.warn "Failed to get current containers for #{service_name}: #{e.message}"
|
|
130
|
+
@logger.debug "get_current_containers error backtrace: #{e.backtrace.join("\n")}"
|
|
131
|
+
[]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Start a new container for the service
|
|
135
|
+
#
|
|
136
|
+
# @param service [Hash] Service configuration
|
|
137
|
+
# @param image [String] Container image
|
|
138
|
+
# @param environment [String] Target environment
|
|
139
|
+
# @return [Hash] New container information
|
|
140
|
+
def start_new_container(service, image, environment)
|
|
141
|
+
container_name = generate_container_name(service[:name])
|
|
142
|
+
|
|
143
|
+
container_config = {
|
|
144
|
+
name: container_name,
|
|
145
|
+
image: image,
|
|
146
|
+
ports: service[:ports] || [],
|
|
147
|
+
env: build_environment_variables(service, environment),
|
|
148
|
+
volumes: service[:volumes] || [],
|
|
149
|
+
command: service[:cmd],
|
|
150
|
+
labels: build_container_labels(service, environment),
|
|
151
|
+
restart_policy: service[:restart_policy] || "unless-stopped"
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
@logger.debug "start_new_container: Calling adapter.start_container with config: #{container_config.inspect}"
|
|
155
|
+
begin
|
|
156
|
+
result = @adapter.start_container(container_config)
|
|
157
|
+
@logger.debug "start_new_container: Result: #{result.inspect}"
|
|
158
|
+
result
|
|
159
|
+
rescue StandardError => e
|
|
160
|
+
@logger.error "start_new_container failed: #{e.message}"
|
|
161
|
+
@logger.debug "start_new_container error backtrace: #{e.backtrace.join("\n")}"
|
|
162
|
+
raise
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Build environment variables for container
|
|
167
|
+
#
|
|
168
|
+
# @param service [Hash] Service configuration
|
|
169
|
+
# @param environment [String] Target environment
|
|
170
|
+
# @return [Hash] Environment variables
|
|
171
|
+
def build_environment_variables(service, environment)
|
|
172
|
+
env_vars = {
|
|
173
|
+
"GJALLARHORN_SERVICE" => service[:name],
|
|
174
|
+
"GJALLARHORN_ENVIRONMENT" => environment,
|
|
175
|
+
"GJALLARHORN_DEPLOYED_AT" => Time.now.utc.iso8601
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Add service-specific environment variables
|
|
179
|
+
# Handle both string and symbol keys from YAML
|
|
180
|
+
service_env = service[:env] || service["env"]
|
|
181
|
+
env_vars.merge!(service_env) if service_env
|
|
182
|
+
|
|
183
|
+
env_vars
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Build container labels for identification and management
|
|
187
|
+
#
|
|
188
|
+
# @param service [Hash] Service configuration
|
|
189
|
+
# @param environment [String] Target environment
|
|
190
|
+
# @return [Hash] Container labels
|
|
191
|
+
def build_container_labels(service, environment)
|
|
192
|
+
{
|
|
193
|
+
"gjallarhorn.service" => service[:name],
|
|
194
|
+
"gjallarhorn.environment" => environment,
|
|
195
|
+
"gjallarhorn.role" => service[:role] || "web",
|
|
196
|
+
"gjallarhorn.deployed_at" => Time.now.utc.iso8601,
|
|
197
|
+
"gjallarhorn.strategy" => "zero_downtime"
|
|
198
|
+
}
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Wait for container to be in running state
|
|
202
|
+
#
|
|
203
|
+
# @param container [Hash] Container information
|
|
204
|
+
# @param timeout [Integer] Timeout in seconds
|
|
205
|
+
# @return [Boolean] True when container is running
|
|
206
|
+
def wait_for_container_running(container, timeout = 60)
|
|
207
|
+
start_time = Time.now
|
|
208
|
+
|
|
209
|
+
loop do
|
|
210
|
+
status = @adapter.get_container_status(container[:id])
|
|
211
|
+
|
|
212
|
+
if status == "running"
|
|
213
|
+
@logger.info "Container #{container[:name]} is running"
|
|
214
|
+
return true
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
elapsed = Time.now - start_time
|
|
218
|
+
if elapsed >= timeout
|
|
219
|
+
raise DeploymentError,
|
|
220
|
+
"Container #{container[:name]} failed to start within #{timeout}s (status: #{status})"
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
@logger.debug "Container #{container[:name]} status: #{status}, waiting..."
|
|
224
|
+
sleep 2
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Gracefully stop old containers
|
|
229
|
+
#
|
|
230
|
+
# @param containers [Array<Hash>] Containers to stop
|
|
231
|
+
# @param drain_timeout [Integer] Time to wait for graceful shutdown
|
|
232
|
+
# @return [void]
|
|
233
|
+
def stop_old_containers(containers, drain_timeout = 30)
|
|
234
|
+
containers.each do |container|
|
|
235
|
+
@logger.info "Stopping container: #{container[:name]} (#{container[:id]})"
|
|
236
|
+
|
|
237
|
+
begin
|
|
238
|
+
# Give container time to finish current requests
|
|
239
|
+
@adapter.stop_container(container[:id], graceful: true, timeout: drain_timeout)
|
|
240
|
+
@logger.info "Successfully stopped container: #{container[:name]}"
|
|
241
|
+
rescue StandardError => e
|
|
242
|
+
@logger.error "Failed to stop container #{container[:name]}: #{e.message}"
|
|
243
|
+
# Continue with other containers even if one fails
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Clean up old containers, keeping a configurable number for rollback
|
|
249
|
+
#
|
|
250
|
+
# @param service_name [String] Service name
|
|
251
|
+
# @param exclude_container_id [String] Container ID to exclude from cleanup
|
|
252
|
+
# @param keep_count [Integer] Number of old containers to keep
|
|
253
|
+
# @return [void]
|
|
254
|
+
def cleanup_old_containers(service_name, exclude_container_id, keep_count = 2)
|
|
255
|
+
all_containers = @adapter.get_all_containers(service_name)
|
|
256
|
+
old_containers = all_containers.reject { |c| c[:id] == exclude_container_id }
|
|
257
|
+
|
|
258
|
+
# Sort by creation time (newest first) and keep only the specified count
|
|
259
|
+
containers_to_remove = old_containers.sort_by { |c| c[:created_at] }.reverse.drop(keep_count)
|
|
260
|
+
|
|
261
|
+
containers_to_remove.each do |container|
|
|
262
|
+
@logger.info "Removing old container: #{container[:name]} (#{container[:id]})"
|
|
263
|
+
@adapter.remove_container(container[:id])
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
@logger.info "Cleaned up #{containers_to_remove.length} old containers" if containers_to_remove.any?
|
|
267
|
+
rescue StandardError => e
|
|
268
|
+
@logger.warn "Failed to cleanup old containers: #{e.message}"
|
|
269
|
+
# Don't fail deployment if cleanup fails
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Raised when deployment operations fail
|
|
274
|
+
class DeploymentError < Error; end
|
|
275
|
+
end
|
|
276
|
+
end
|