container_broker 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +98 -0
- data/Rakefile +8 -0
- data/app/controllers/application_controller.rb +5 -0
- data/app/controllers/healthcheck_controller.rb +21 -0
- data/app/controllers/nodes_controller.rb +70 -0
- data/app/controllers/nodes_healthcheck_controller.rb +28 -0
- data/app/controllers/status_controller.rb +48 -0
- data/app/controllers/tasks_controller.rb +83 -0
- data/app/controllers/tasks_healthcheck_controller.rb +28 -0
- data/app/jobs/add_task_tags_job.rb +13 -0
- data/app/jobs/adjust_node_slots_job.rb +27 -0
- data/app/jobs/application_job.rb +9 -0
- data/app/jobs/collect_load_metrics_job.rb +9 -0
- data/app/jobs/container_broker_base_job.rb +32 -0
- data/app/jobs/migrate_tasks_from_dead_node_job.rb +32 -0
- data/app/jobs/monitor_unresponsive_node_job.rb +21 -0
- data/app/jobs/monitor_unresponsive_nodes_job.rb +9 -0
- data/app/jobs/release_slot_job.rb +47 -0
- data/app/jobs/remove_runner_job.rb +11 -0
- data/app/jobs/remove_unused_tags_job.rb +25 -0
- data/app/jobs/request_id_from_task.rb +7 -0
- data/app/jobs/run_task_job.rb +64 -0
- data/app/jobs/run_tasks_for_all_execution_types_job.rb +11 -0
- data/app/jobs/run_tasks_job.rb +42 -0
- data/app/jobs/timeout_failed_tasks_job.rb +31 -0
- data/app/jobs/update_all_nodes_status_job.rb +9 -0
- data/app/jobs/update_node_status_job.rb +24 -0
- data/app/jobs/update_task_status_job.rb +71 -0
- data/app/models/mongoid_serializable_model.rb +14 -0
- data/app/models/node.rb +101 -0
- data/app/models/slot.rb +42 -0
- data/app/models/task.rb +148 -0
- data/app/models/task_tag.rb +11 -0
- data/app/observers/observable.rb +23 -0
- data/app/observers/task_observer.rb +11 -0
- data/app/serializers/node_healthcheck_serializer.rb +5 -0
- data/app/serializers/node_serializer.rb +5 -0
- data/app/serializers/status_panel_node_serializer.rb +9 -0
- data/app/serializers/status_panel_slot_serializer.rb +5 -0
- data/app/serializers/status_panel_task_serializer.rb +16 -0
- data/app/serializers/task_healthcheck_serializer.rb +5 -0
- data/app/serializers/task_serializer.rb +7 -0
- data/app/services/adjust_execution_type_slots.rb +51 -0
- data/app/services/check_for_slot_removal.rb +28 -0
- data/app/services/collect_load_metrics.rb +40 -0
- data/app/services/delete_node.rb +25 -0
- data/app/services/friendly_name_nodes.rb +10 -0
- data/app/services/friendly_name_slots.rb +15 -0
- data/app/services/kill_node_runners.rb +17 -0
- data/app/services/kill_task_container.rb +29 -0
- data/app/services/kubernetes_client.rb +136 -0
- data/app/services/least_used_node.rb +44 -0
- data/app/services/lock_manager.rb +74 -0
- data/app/services/lock_slot.rb +37 -0
- data/app/services/lock_task.rb +45 -0
- data/app/services/metrics.rb +43 -0
- data/app/services/migrate_runner.rb +26 -0
- data/app/services/node_task_acceptance.rb +18 -0
- data/app/services/node_usage_percentage_per_execution_type.rb +22 -0
- data/app/services/reschedule_tasks_for_missing_runners.rb +70 -0
- data/app/services/runners.rb +4 -0
- data/app/services/runners/docker/create_connection.rb +18 -0
- data/app/services/runners/docker/create_execution_info.rb +87 -0
- data/app/services/runners/docker/fetch_execution_info.rb +17 -0
- data/app/services/runners/docker/fetch_logs.rb +18 -0
- data/app/services/runners/docker/fetch_task_container.rb +15 -0
- data/app/services/runners/docker/filer.rb +19 -0
- data/app/services/runners/docker/kill_slot_runner.rb +19 -0
- data/app/services/runners/docker/node_availability.rb +11 -0
- data/app/services/runners/docker/remove_runner.rb +18 -0
- data/app/services/runners/docker/run_task.rb +63 -0
- data/app/services/runners/docker/update_node_status.rb +62 -0
- data/app/services/runners/execution_info.rb +49 -0
- data/app/services/runners/invalid_config.rb +5 -0
- data/app/services/runners/invalid_runner.rb +5 -0
- data/app/services/runners/kubernetes/create_client.rb +29 -0
- data/app/services/runners/kubernetes/create_execution_info.rb +103 -0
- data/app/services/runners/kubernetes/fetch_execution_info.rb +15 -0
- data/app/services/runners/kubernetes/fetch_logs.rb +17 -0
- data/app/services/runners/kubernetes/filer.rb +41 -0
- data/app/services/runners/kubernetes/kill_slot_runner.rb +11 -0
- data/app/services/runners/kubernetes/node_availability.rb +11 -0
- data/app/services/runners/kubernetes/remove_runner.rb +19 -0
- data/app/services/runners/kubernetes/run_task.rb +54 -0
- data/app/services/runners/kubernetes/update_node_status.rb +64 -0
- data/app/services/runners/runner_id_not_found_error.rb +5 -0
- data/app/services/runners/services_factory.rb +38 -0
- data/app/services/runners/update_node_status_helper.rb +43 -0
- data/app/services/slots_usage_percentage.rb +18 -0
- data/config/application.rb +34 -0
- data/config/boot.rb +5 -0
- data/config/environment.rb +7 -0
- data/config/environments/test.rb +44 -0
- data/config/initializers/application_controller_renderer.rb +10 -0
- data/config/initializers/backtrace_silencers.rb +9 -0
- data/config/initializers/config.rb +51 -0
- data/config/initializers/cookies_serializer.rb +7 -0
- data/config/initializers/docker_config.rb +3 -0
- data/config/initializers/filter_parameter_logging.rb +6 -0
- data/config/initializers/idempotent_request.rb +12 -0
- data/config/initializers/inflections.rb +18 -0
- data/config/initializers/mime_types.rb +6 -0
- data/config/initializers/mongoid.rb +3 -0
- data/config/initializers/new_framework_defaults_6_0.rb +47 -0
- data/config/initializers/raven.rb +10 -0
- data/config/initializers/sidekiq.rb +24 -0
- data/config/initializers/wrap_parameters.rb +16 -0
- data/config/locales/en.yml +33 -0
- data/config/mongoid.yml +10 -0
- data/config/routes.rb +43 -0
- data/config/secrets.yml +35 -0
- data/config/settings.yml +34 -0
- data/config/settings/test.yml +27 -0
- data/config/sidekiq_scheduler.yml +18 -0
- data/config/spring.rb +8 -0
- data/lib/constants.rb +12 -0
- data/lib/container_broker.rb +30 -0
- data/lib/container_broker/engine.rb +6 -0
- data/lib/container_broker/version.rb +5 -0
- data/lib/current_thread_request_id.rb +19 -0
- data/lib/idempotent_request/callback.rb +25 -0
- data/lib/idempotent_request/policy.rb +15 -0
- data/lib/redis_url_parser.rb +25 -0
- data/lib/tasks/task.rake +34 -0
- metadata +590 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ApplicationJob < ActiveJob::Base
|
4
|
+
# Automatically retry jobs that encountered a deadlock
|
5
|
+
# retry_on ActiveRecord::Deadlocked
|
6
|
+
|
7
|
+
# Most jobs are safe to ignore if the underlying records are no longer available
|
8
|
+
# discard_on ActiveJob::DeserializationError
|
9
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ContainerBrokerBaseJob < ApplicationJob
|
4
|
+
JOB_METRIC = "jobs"
|
5
|
+
|
6
|
+
around_perform do |job, block|
|
7
|
+
time = Benchmark.realtime { block.call }
|
8
|
+
|
9
|
+
Metrics.new(JOB_METRIC).count(
|
10
|
+
job_id: job.job_id,
|
11
|
+
job_class: job.class.to_s,
|
12
|
+
executions: job.executions,
|
13
|
+
queue_name: job.queue_name,
|
14
|
+
hostname: Socket.gethostname,
|
15
|
+
time: time
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
around_perform do |job, block|
|
20
|
+
request_id = job.class.request_id_from_args(job.arguments.first)
|
21
|
+
|
22
|
+
if request_id
|
23
|
+
Rails.logger.tagged(" request_id=#{request_id} ") do
|
24
|
+
CurrentThreadRequestId.set(request_id) { block.call }
|
25
|
+
end
|
26
|
+
else
|
27
|
+
block.call
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.request_id_from_args(_args); end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MigrateTasksFromDeadNodeJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
if node.available?
|
8
|
+
Rails.logger.debug("Not migrating tasks because #{node} returned to available status")
|
9
|
+
return
|
10
|
+
end
|
11
|
+
|
12
|
+
node.run_with_lock_no_wait do
|
13
|
+
Rails.logger.debug("Migrating tasks from #{node}")
|
14
|
+
node.slots.reject(&:available?).each do |slot|
|
15
|
+
Rails.logger.debug("Migrating task for #{slot}")
|
16
|
+
current_task = slot.current_task
|
17
|
+
if current_task
|
18
|
+
Rails.logger.debug("Retrying slot current task #{current_task}")
|
19
|
+
current_task.mark_as_retry if current_task.starting? || current_task.started?
|
20
|
+
else
|
21
|
+
Rails.logger.debug("Slot does not have current task")
|
22
|
+
end
|
23
|
+
|
24
|
+
MigrateRunner.new(runner_id: slot.runner_id).migrate
|
25
|
+
|
26
|
+
Rails.logger.debug("Releasing #{slot}")
|
27
|
+
slot.release
|
28
|
+
Rails.logger.debug("#{slot} released")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MonitorUnresponsiveNodeJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
node.run_with_lock_no_wait do
|
8
|
+
node.runner_service(:node_availability).perform(node: node)
|
9
|
+
|
10
|
+
Rails.logger.debug("Marking #{node} as available again")
|
11
|
+
node.register_success
|
12
|
+
node.available!
|
13
|
+
|
14
|
+
RunTasksForAllExecutionTypesJob.perform_later
|
15
|
+
end
|
16
|
+
rescue StandardError => e
|
17
|
+
node.register_error("#{e.class}: #{e.message}")
|
18
|
+
|
19
|
+
Rails.logger.info("#{node} still unresponsive")
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ReleaseSlotJob < ContainerBrokerBaseJob
|
4
|
+
class InvalidSlotContainerId < StandardError; end
|
5
|
+
queue_as :default
|
6
|
+
|
7
|
+
def perform(slot:, runner_id:)
|
8
|
+
Rails.logger.debug("ReleaseSlotJob for #{slot} and container #{runner_id}")
|
9
|
+
|
10
|
+
if MigrateRunner.new(runner_id: runner_id).migrated?
|
11
|
+
Rails.logger.debug("Ignores release slot for #{slot} because it's migrated")
|
12
|
+
return
|
13
|
+
end
|
14
|
+
|
15
|
+
check_same_runner_id(slot: slot, runner_id: runner_id)
|
16
|
+
|
17
|
+
UpdateTaskStatusJob.perform_now(slot.current_task.reload)
|
18
|
+
|
19
|
+
Rails.logger.debug("Enqueueing container removal")
|
20
|
+
RemoveRunnerJob.perform_later(node: MongoidSerializableModel.new(slot.node), runner_id: slot.runner_id) if Settings.delete_container_after_run
|
21
|
+
|
22
|
+
check_for_slot_removal = CheckForSlotRemoval.new(slot: slot)
|
23
|
+
check_for_slot_removal.perform
|
24
|
+
if check_for_slot_removal.removed?
|
25
|
+
Rails.logger.debug("Slot removed and wont be released")
|
26
|
+
else
|
27
|
+
slot.release
|
28
|
+
Rails.logger.debug("Slot released (#{slot.status})")
|
29
|
+
end
|
30
|
+
rescue Runners::RunnerIdNotFoundError => e
|
31
|
+
Rails.logger.debug("Runner #{runner_id} not found (#{e.message}). Task will be rescheduled in UpdateNodeStatus.")
|
32
|
+
rescue StandardError => e
|
33
|
+
Rails.logger.debug("Error in ReleaseSlotJob for #{slot}: #{e}")
|
34
|
+
slot.node.register_error(e.message)
|
35
|
+
raise
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_same_runner_id(slot:, runner_id:)
|
39
|
+
return if runner_id == slot.runner_id
|
40
|
+
|
41
|
+
error_message = "Current container id (#{slot.runner_id}) in #{slot} is different than the provided (#{runner_id})"
|
42
|
+
|
43
|
+
Rails.logger.error(error_message)
|
44
|
+
|
45
|
+
raise InvalidSlotContainerId, error_message
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RemoveUnusedTagsJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(*_args)
|
7
|
+
remove_unreferenced_tags
|
8
|
+
end
|
9
|
+
|
10
|
+
def remove_unreferenced_tags
|
11
|
+
TaskTag
|
12
|
+
.all
|
13
|
+
.to_a
|
14
|
+
.reject { |task_tag| any_task_referencing_tag?(task_tag) }
|
15
|
+
.each(&:destroy!)
|
16
|
+
end
|
17
|
+
|
18
|
+
def tag_expression(task_tag)
|
19
|
+
:"tags.#{task_tag.name}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def any_task_referencing_tag?(task_tag)
|
23
|
+
Task.where(tag_expression(task_tag).exists => true).exists?
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTaskJob < ContainerBrokerBaseJob
|
4
|
+
extend RequestIdFromTask
|
5
|
+
|
6
|
+
queue_as :default
|
7
|
+
|
8
|
+
def perform(task:, slot:)
|
9
|
+
# TODO: remove after successful deploy
|
10
|
+
task.update!(storage_mounts: { "ingest-nfs" => task.attributes["ingest_storage_mount"] }) if task.attributes["ingest_storage_mount"] && task.storage_mounts.blank?
|
11
|
+
|
12
|
+
Rails.logger.debug("Performing RunTaskJob for #{task} #{slot}")
|
13
|
+
|
14
|
+
raise "Invalid task status - #{task}" unless task.starting?
|
15
|
+
raise "Invalid slot status - #{slot}" unless slot.attaching?
|
16
|
+
|
17
|
+
runner_id = task.generate_runner_id
|
18
|
+
|
19
|
+
task.update!(runner_id: runner_id)
|
20
|
+
slot.update!(runner_id: runner_id)
|
21
|
+
|
22
|
+
slot
|
23
|
+
.node
|
24
|
+
.runner_service(:run_task)
|
25
|
+
.perform(task: task, slot: slot, runner_id: runner_id)
|
26
|
+
|
27
|
+
task.mark_as_started!(runner_id: runner_id, slot: slot)
|
28
|
+
Rails.logger.debug("#{task} marked as started")
|
29
|
+
|
30
|
+
slot.mark_as_running(current_task: task, runner_id: runner_id)
|
31
|
+
Rails.logger.debug("#{slot} marked as running")
|
32
|
+
|
33
|
+
add_metric(task)
|
34
|
+
task
|
35
|
+
rescue StandardError => e
|
36
|
+
Rails.logger.debug("Error in RunTaskJob: #{e}")
|
37
|
+
|
38
|
+
slot.node.register_error(e.message) if e.is_a?(Node::NodeConnectionError)
|
39
|
+
|
40
|
+
slot.release
|
41
|
+
Rails.logger.debug("#{slot} released")
|
42
|
+
|
43
|
+
task.mark_as_retry(error: e.message)
|
44
|
+
Rails.logger.debug("#{task} marked as retry")
|
45
|
+
|
46
|
+
add_metric(task)
|
47
|
+
|
48
|
+
Rails.logger.debug("Performed RunTaskJob for #{task} #{slot}")
|
49
|
+
end
|
50
|
+
|
51
|
+
def add_metric(task)
|
52
|
+
Metrics.new("tasks").count(
|
53
|
+
task_id: task.id,
|
54
|
+
name: task&.name,
|
55
|
+
type: task&.execution_type,
|
56
|
+
slot: task&.slot&.name,
|
57
|
+
node: task&.slot&.node&.name,
|
58
|
+
started_at: task.started_at,
|
59
|
+
duration: task.milliseconds_waiting,
|
60
|
+
error: task.error,
|
61
|
+
status: task.status
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTasksForAllExecutionTypesJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform
|
7
|
+
Slot.pluck(:execution_type).uniq.each do |execution_type|
|
8
|
+
RunTasksJob.perform_later(execution_type: execution_type)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTasksJob < ContainerBrokerBaseJob
|
4
|
+
attr_reader :execution_type
|
5
|
+
|
6
|
+
def perform(execution_type:)
|
7
|
+
@execution_type = execution_type
|
8
|
+
|
9
|
+
enqueue_tasks
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def enqueue_tasks
|
15
|
+
while have_pending_tasks? && (slot = lock_slot)
|
16
|
+
task = lock_task
|
17
|
+
if task
|
18
|
+
Rails.logger.debug "Perform_later RunTaskJob for #{slot} #{task}"
|
19
|
+
RunTaskJob.perform_later(slot: slot, task: task)
|
20
|
+
else
|
21
|
+
slot.available!
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def lock_slot
|
28
|
+
LockSlot.new(execution_type: execution_type).perform
|
29
|
+
end
|
30
|
+
|
31
|
+
def lock_task
|
32
|
+
lock_task_service.perform
|
33
|
+
end
|
34
|
+
|
35
|
+
def have_pending_tasks?
|
36
|
+
lock_task_service.any_pending?
|
37
|
+
end
|
38
|
+
|
39
|
+
def lock_task_service
|
40
|
+
@lock_task_service ||= LockTask.new(execution_type: execution_type)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class TimeoutFailedTasksJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(*_args)
|
7
|
+
timeout_failed_tasks
|
8
|
+
end
|
9
|
+
|
10
|
+
def timeout_failed_tasks
|
11
|
+
failed_tasks_to_timeout.map do |task|
|
12
|
+
Rails.logger.debug("Marking task as error due to timeout: #{task.uuid}")
|
13
|
+
|
14
|
+
task.error!
|
15
|
+
|
16
|
+
persist_logs(task)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def failed_tasks_to_timeout
|
21
|
+
Task.failed.where(:finished_at.lt => Time.current - Settings.timeout_tasks_after_hours)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def persist_logs(task)
|
27
|
+
task.set_logs("#{task.get_logs}\nThis task was automatically marked as error due to timeout.\n".dup)
|
28
|
+
|
29
|
+
task.save!
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateNodeStatusJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
Rails.logger.debug("Waiting for lock to update status of #{node}")
|
8
|
+
|
9
|
+
updated = LockManager.new(type: self.class.to_s, id: node.id, expire: 1.minute, wait: false).lock do
|
10
|
+
Rails.logger.debug("Lock acquired for update status of #{node}")
|
11
|
+
|
12
|
+
node.runner_service(:update_node_status).perform(node: node)
|
13
|
+
|
14
|
+
Rails.logger.debug("Releasing lock for update status of #{node}")
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
if updated
|
19
|
+
Rails.logger.debug("Lock released for update status of #{node}")
|
20
|
+
else
|
21
|
+
Rails.logger.debug("Node updating is locked by another job and will be ignored now")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateTaskStatusJob < ContainerBrokerBaseJob
|
4
|
+
extend RequestIdFromTask
|
5
|
+
|
6
|
+
class InvalidContainerStatusError < StandardError; end
|
7
|
+
|
8
|
+
queue_as :default
|
9
|
+
|
10
|
+
def perform(task)
|
11
|
+
Rails.logger.debug("Updating status for task #{task}")
|
12
|
+
Rails.logger.debug("Task #{task} is running in slot #{task.slot}")
|
13
|
+
|
14
|
+
execution_info = task.slot.node.runner_service(:fetch_execution_info).perform(task: task)
|
15
|
+
|
16
|
+
Rails.logger.debug("Got runner #{execution_info.id} with state #{execution_info.status}")
|
17
|
+
|
18
|
+
unless execution_info.terminated?
|
19
|
+
Rails.logger.debug("Runner should be terminated but it is #{execution_info.status}. Execution info is #{execution_info.to_h}")
|
20
|
+
raise InvalidContainerStatusError,
|
21
|
+
"Runner should be terminated (current status: #{execution_info.status})"
|
22
|
+
end
|
23
|
+
|
24
|
+
Rails.logger.debug("Container is in status #{execution_info.status} and exit code #{execution_info.exit_code}")
|
25
|
+
|
26
|
+
task.exit_code = execution_info.exit_code
|
27
|
+
task.started_at = execution_info.started_at
|
28
|
+
task.finished_at = execution_info.finished_at
|
29
|
+
|
30
|
+
persist_logs(task)
|
31
|
+
|
32
|
+
if execution_info.success?
|
33
|
+
Rails.logger.debug("Marking task as completed and no errors")
|
34
|
+
task.error = nil
|
35
|
+
task.completed!
|
36
|
+
else
|
37
|
+
Rails.logger.debug("Marked task for retry and set error as #{execution_info.error}")
|
38
|
+
task.mark_as_retry(error: execution_info.error)
|
39
|
+
end
|
40
|
+
|
41
|
+
task.save!
|
42
|
+
|
43
|
+
add_metric(task)
|
44
|
+
end
|
45
|
+
|
46
|
+
def persist_logs(task)
|
47
|
+
return unless task.persist_logs
|
48
|
+
|
49
|
+
Rails.logger.debug("Persisting logs for #{task}")
|
50
|
+
container_logs = task.slot.node.runner_service(:fetch_logs).perform(task: task)
|
51
|
+
task.set_logs(container_logs)
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_metric(task)
|
55
|
+
Metrics.new("tasks").count(
|
56
|
+
task_id: task.id,
|
57
|
+
event_id: task&.tags&.dig("event_id"),
|
58
|
+
api_id: task&.tags&.dig("api_id").to_i,
|
59
|
+
name: task&.name,
|
60
|
+
type: task&.execution_type,
|
61
|
+
slot: task&.slot&.name,
|
62
|
+
node: task&.slot&.node&.name,
|
63
|
+
started_at: task.started_at,
|
64
|
+
finished_at: task.finished_at,
|
65
|
+
duration: task.milliseconds_running,
|
66
|
+
processing_time: task.seconds_running.to_i,
|
67
|
+
error: task.error,
|
68
|
+
status: task.status
|
69
|
+
)
|
70
|
+
end
|
71
|
+
end
|