container_broker 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +98 -0
- data/Rakefile +8 -0
- data/app/controllers/application_controller.rb +5 -0
- data/app/controllers/healthcheck_controller.rb +21 -0
- data/app/controllers/nodes_controller.rb +70 -0
- data/app/controllers/nodes_healthcheck_controller.rb +28 -0
- data/app/controllers/status_controller.rb +48 -0
- data/app/controllers/tasks_controller.rb +83 -0
- data/app/controllers/tasks_healthcheck_controller.rb +28 -0
- data/app/jobs/add_task_tags_job.rb +13 -0
- data/app/jobs/adjust_node_slots_job.rb +27 -0
- data/app/jobs/application_job.rb +9 -0
- data/app/jobs/collect_load_metrics_job.rb +9 -0
- data/app/jobs/container_broker_base_job.rb +32 -0
- data/app/jobs/migrate_tasks_from_dead_node_job.rb +32 -0
- data/app/jobs/monitor_unresponsive_node_job.rb +21 -0
- data/app/jobs/monitor_unresponsive_nodes_job.rb +9 -0
- data/app/jobs/release_slot_job.rb +47 -0
- data/app/jobs/remove_runner_job.rb +11 -0
- data/app/jobs/remove_unused_tags_job.rb +25 -0
- data/app/jobs/request_id_from_task.rb +7 -0
- data/app/jobs/run_task_job.rb +64 -0
- data/app/jobs/run_tasks_for_all_execution_types_job.rb +11 -0
- data/app/jobs/run_tasks_job.rb +42 -0
- data/app/jobs/timeout_failed_tasks_job.rb +31 -0
- data/app/jobs/update_all_nodes_status_job.rb +9 -0
- data/app/jobs/update_node_status_job.rb +24 -0
- data/app/jobs/update_task_status_job.rb +71 -0
- data/app/models/mongoid_serializable_model.rb +14 -0
- data/app/models/node.rb +101 -0
- data/app/models/slot.rb +42 -0
- data/app/models/task.rb +148 -0
- data/app/models/task_tag.rb +11 -0
- data/app/observers/observable.rb +23 -0
- data/app/observers/task_observer.rb +11 -0
- data/app/serializers/node_healthcheck_serializer.rb +5 -0
- data/app/serializers/node_serializer.rb +5 -0
- data/app/serializers/status_panel_node_serializer.rb +9 -0
- data/app/serializers/status_panel_slot_serializer.rb +5 -0
- data/app/serializers/status_panel_task_serializer.rb +16 -0
- data/app/serializers/task_healthcheck_serializer.rb +5 -0
- data/app/serializers/task_serializer.rb +7 -0
- data/app/services/adjust_execution_type_slots.rb +51 -0
- data/app/services/check_for_slot_removal.rb +28 -0
- data/app/services/collect_load_metrics.rb +40 -0
- data/app/services/delete_node.rb +25 -0
- data/app/services/friendly_name_nodes.rb +10 -0
- data/app/services/friendly_name_slots.rb +15 -0
- data/app/services/kill_node_runners.rb +17 -0
- data/app/services/kill_task_container.rb +29 -0
- data/app/services/kubernetes_client.rb +136 -0
- data/app/services/least_used_node.rb +44 -0
- data/app/services/lock_manager.rb +74 -0
- data/app/services/lock_slot.rb +37 -0
- data/app/services/lock_task.rb +45 -0
- data/app/services/metrics.rb +43 -0
- data/app/services/migrate_runner.rb +26 -0
- data/app/services/node_task_acceptance.rb +18 -0
- data/app/services/node_usage_percentage_per_execution_type.rb +22 -0
- data/app/services/reschedule_tasks_for_missing_runners.rb +70 -0
- data/app/services/runners.rb +4 -0
- data/app/services/runners/docker/create_connection.rb +18 -0
- data/app/services/runners/docker/create_execution_info.rb +87 -0
- data/app/services/runners/docker/fetch_execution_info.rb +17 -0
- data/app/services/runners/docker/fetch_logs.rb +18 -0
- data/app/services/runners/docker/fetch_task_container.rb +15 -0
- data/app/services/runners/docker/filer.rb +19 -0
- data/app/services/runners/docker/kill_slot_runner.rb +19 -0
- data/app/services/runners/docker/node_availability.rb +11 -0
- data/app/services/runners/docker/remove_runner.rb +18 -0
- data/app/services/runners/docker/run_task.rb +63 -0
- data/app/services/runners/docker/update_node_status.rb +62 -0
- data/app/services/runners/execution_info.rb +49 -0
- data/app/services/runners/invalid_config.rb +5 -0
- data/app/services/runners/invalid_runner.rb +5 -0
- data/app/services/runners/kubernetes/create_client.rb +29 -0
- data/app/services/runners/kubernetes/create_execution_info.rb +103 -0
- data/app/services/runners/kubernetes/fetch_execution_info.rb +15 -0
- data/app/services/runners/kubernetes/fetch_logs.rb +17 -0
- data/app/services/runners/kubernetes/filer.rb +41 -0
- data/app/services/runners/kubernetes/kill_slot_runner.rb +11 -0
- data/app/services/runners/kubernetes/node_availability.rb +11 -0
- data/app/services/runners/kubernetes/remove_runner.rb +19 -0
- data/app/services/runners/kubernetes/run_task.rb +54 -0
- data/app/services/runners/kubernetes/update_node_status.rb +64 -0
- data/app/services/runners/runner_id_not_found_error.rb +5 -0
- data/app/services/runners/services_factory.rb +38 -0
- data/app/services/runners/update_node_status_helper.rb +43 -0
- data/app/services/slots_usage_percentage.rb +18 -0
- data/config/application.rb +34 -0
- data/config/boot.rb +5 -0
- data/config/environment.rb +7 -0
- data/config/environments/test.rb +44 -0
- data/config/initializers/application_controller_renderer.rb +10 -0
- data/config/initializers/backtrace_silencers.rb +9 -0
- data/config/initializers/config.rb +51 -0
- data/config/initializers/cookies_serializer.rb +7 -0
- data/config/initializers/docker_config.rb +3 -0
- data/config/initializers/filter_parameter_logging.rb +6 -0
- data/config/initializers/idempotent_request.rb +12 -0
- data/config/initializers/inflections.rb +18 -0
- data/config/initializers/mime_types.rb +6 -0
- data/config/initializers/mongoid.rb +3 -0
- data/config/initializers/new_framework_defaults_6_0.rb +47 -0
- data/config/initializers/raven.rb +10 -0
- data/config/initializers/sidekiq.rb +24 -0
- data/config/initializers/wrap_parameters.rb +16 -0
- data/config/locales/en.yml +33 -0
- data/config/mongoid.yml +10 -0
- data/config/routes.rb +43 -0
- data/config/secrets.yml +35 -0
- data/config/settings.yml +34 -0
- data/config/settings/test.yml +27 -0
- data/config/sidekiq_scheduler.yml +18 -0
- data/config/spring.rb +8 -0
- data/lib/constants.rb +12 -0
- data/lib/container_broker.rb +30 -0
- data/lib/container_broker/engine.rb +6 -0
- data/lib/container_broker/version.rb +5 -0
- data/lib/current_thread_request_id.rb +19 -0
- data/lib/idempotent_request/callback.rb +25 -0
- data/lib/idempotent_request/policy.rb +15 -0
- data/lib/redis_url_parser.rb +25 -0
- data/lib/tasks/task.rake +34 -0
- metadata +590 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ApplicationJob < ActiveJob::Base
|
4
|
+
# Automatically retry jobs that encountered a deadlock
|
5
|
+
# retry_on ActiveRecord::Deadlocked
|
6
|
+
|
7
|
+
# Most jobs are safe to ignore if the underlying records are no longer available
|
8
|
+
# discard_on ActiveJob::DeserializationError
|
9
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ContainerBrokerBaseJob < ApplicationJob
|
4
|
+
JOB_METRIC = "jobs"
|
5
|
+
|
6
|
+
around_perform do |job, block|
|
7
|
+
time = Benchmark.realtime { block.call }
|
8
|
+
|
9
|
+
Metrics.new(JOB_METRIC).count(
|
10
|
+
job_id: job.job_id,
|
11
|
+
job_class: job.class.to_s,
|
12
|
+
executions: job.executions,
|
13
|
+
queue_name: job.queue_name,
|
14
|
+
hostname: Socket.gethostname,
|
15
|
+
time: time
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
around_perform do |job, block|
|
20
|
+
request_id = job.class.request_id_from_args(job.arguments.first)
|
21
|
+
|
22
|
+
if request_id
|
23
|
+
Rails.logger.tagged(" request_id=#{request_id} ") do
|
24
|
+
CurrentThreadRequestId.set(request_id) { block.call }
|
25
|
+
end
|
26
|
+
else
|
27
|
+
block.call
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.request_id_from_args(_args); end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MigrateTasksFromDeadNodeJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
if node.available?
|
8
|
+
Rails.logger.debug("Not migrating tasks because #{node} returned to available status")
|
9
|
+
return
|
10
|
+
end
|
11
|
+
|
12
|
+
node.run_with_lock_no_wait do
|
13
|
+
Rails.logger.debug("Migrating tasks from #{node}")
|
14
|
+
node.slots.reject(&:available?).each do |slot|
|
15
|
+
Rails.logger.debug("Migrating task for #{slot}")
|
16
|
+
current_task = slot.current_task
|
17
|
+
if current_task
|
18
|
+
Rails.logger.debug("Retrying slot current task #{current_task}")
|
19
|
+
current_task.mark_as_retry if current_task.starting? || current_task.started?
|
20
|
+
else
|
21
|
+
Rails.logger.debug("Slot does not have current task")
|
22
|
+
end
|
23
|
+
|
24
|
+
MigrateRunner.new(runner_id: slot.runner_id).migrate
|
25
|
+
|
26
|
+
Rails.logger.debug("Releasing #{slot}")
|
27
|
+
slot.release
|
28
|
+
Rails.logger.debug("#{slot} released")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MonitorUnresponsiveNodeJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
node.run_with_lock_no_wait do
|
8
|
+
node.runner_service(:node_availability).perform(node: node)
|
9
|
+
|
10
|
+
Rails.logger.debug("Marking #{node} as available again")
|
11
|
+
node.register_success
|
12
|
+
node.available!
|
13
|
+
|
14
|
+
RunTasksForAllExecutionTypesJob.perform_later
|
15
|
+
end
|
16
|
+
rescue StandardError => e
|
17
|
+
node.register_error("#{e.class}: #{e.message}")
|
18
|
+
|
19
|
+
Rails.logger.info("#{node} still unresponsive")
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class ReleaseSlotJob < ContainerBrokerBaseJob
|
4
|
+
class InvalidSlotContainerId < StandardError; end
|
5
|
+
queue_as :default
|
6
|
+
|
7
|
+
def perform(slot:, runner_id:)
|
8
|
+
Rails.logger.debug("ReleaseSlotJob for #{slot} and container #{runner_id}")
|
9
|
+
|
10
|
+
if MigrateRunner.new(runner_id: runner_id).migrated?
|
11
|
+
Rails.logger.debug("Ignores release slot for #{slot} because it's migrated")
|
12
|
+
return
|
13
|
+
end
|
14
|
+
|
15
|
+
check_same_runner_id(slot: slot, runner_id: runner_id)
|
16
|
+
|
17
|
+
UpdateTaskStatusJob.perform_now(slot.current_task.reload)
|
18
|
+
|
19
|
+
Rails.logger.debug("Enqueueing container removal")
|
20
|
+
RemoveRunnerJob.perform_later(node: MongoidSerializableModel.new(slot.node), runner_id: slot.runner_id) if Settings.delete_container_after_run
|
21
|
+
|
22
|
+
check_for_slot_removal = CheckForSlotRemoval.new(slot: slot)
|
23
|
+
check_for_slot_removal.perform
|
24
|
+
if check_for_slot_removal.removed?
|
25
|
+
Rails.logger.debug("Slot removed and wont be released")
|
26
|
+
else
|
27
|
+
slot.release
|
28
|
+
Rails.logger.debug("Slot released (#{slot.status})")
|
29
|
+
end
|
30
|
+
rescue Runners::RunnerIdNotFoundError => e
|
31
|
+
Rails.logger.debug("Runner #{runner_id} not found (#{e.message}). Task will be rescheduled in UpdateNodeStatus.")
|
32
|
+
rescue StandardError => e
|
33
|
+
Rails.logger.debug("Error in ReleaseSlotJob for #{slot}: #{e}")
|
34
|
+
slot.node.register_error(e.message)
|
35
|
+
raise
|
36
|
+
end
|
37
|
+
|
38
|
+
def check_same_runner_id(slot:, runner_id:)
|
39
|
+
return if runner_id == slot.runner_id
|
40
|
+
|
41
|
+
error_message = "Current container id (#{slot.runner_id}) in #{slot} is different than the provided (#{runner_id})"
|
42
|
+
|
43
|
+
Rails.logger.error(error_message)
|
44
|
+
|
45
|
+
raise InvalidSlotContainerId, error_message
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RemoveUnusedTagsJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(*_args)
|
7
|
+
remove_unreferenced_tags
|
8
|
+
end
|
9
|
+
|
10
|
+
def remove_unreferenced_tags
|
11
|
+
TaskTag
|
12
|
+
.all
|
13
|
+
.to_a
|
14
|
+
.reject { |task_tag| any_task_referencing_tag?(task_tag) }
|
15
|
+
.each(&:destroy!)
|
16
|
+
end
|
17
|
+
|
18
|
+
def tag_expression(task_tag)
|
19
|
+
:"tags.#{task_tag.name}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def any_task_referencing_tag?(task_tag)
|
23
|
+
Task.where(tag_expression(task_tag).exists => true).exists?
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTaskJob < ContainerBrokerBaseJob
|
4
|
+
extend RequestIdFromTask
|
5
|
+
|
6
|
+
queue_as :default
|
7
|
+
|
8
|
+
def perform(task:, slot:)
|
9
|
+
# TODO: remove after successful deploy
|
10
|
+
task.update!(storage_mounts: { "ingest-nfs" => task.attributes["ingest_storage_mount"] }) if task.attributes["ingest_storage_mount"] && task.storage_mounts.blank?
|
11
|
+
|
12
|
+
Rails.logger.debug("Performing RunTaskJob for #{task} #{slot}")
|
13
|
+
|
14
|
+
raise "Invalid task status - #{task}" unless task.starting?
|
15
|
+
raise "Invalid slot status - #{slot}" unless slot.attaching?
|
16
|
+
|
17
|
+
runner_id = task.generate_runner_id
|
18
|
+
|
19
|
+
task.update!(runner_id: runner_id)
|
20
|
+
slot.update!(runner_id: runner_id)
|
21
|
+
|
22
|
+
slot
|
23
|
+
.node
|
24
|
+
.runner_service(:run_task)
|
25
|
+
.perform(task: task, slot: slot, runner_id: runner_id)
|
26
|
+
|
27
|
+
task.mark_as_started!(runner_id: runner_id, slot: slot)
|
28
|
+
Rails.logger.debug("#{task} marked as started")
|
29
|
+
|
30
|
+
slot.mark_as_running(current_task: task, runner_id: runner_id)
|
31
|
+
Rails.logger.debug("#{slot} marked as running")
|
32
|
+
|
33
|
+
add_metric(task)
|
34
|
+
task
|
35
|
+
rescue StandardError => e
|
36
|
+
Rails.logger.debug("Error in RunTaskJob: #{e}")
|
37
|
+
|
38
|
+
slot.node.register_error(e.message) if e.is_a?(Node::NodeConnectionError)
|
39
|
+
|
40
|
+
slot.release
|
41
|
+
Rails.logger.debug("#{slot} released")
|
42
|
+
|
43
|
+
task.mark_as_retry(error: e.message)
|
44
|
+
Rails.logger.debug("#{task} marked as retry")
|
45
|
+
|
46
|
+
add_metric(task)
|
47
|
+
|
48
|
+
Rails.logger.debug("Performed RunTaskJob for #{task} #{slot}")
|
49
|
+
end
|
50
|
+
|
51
|
+
def add_metric(task)
|
52
|
+
Metrics.new("tasks").count(
|
53
|
+
task_id: task.id,
|
54
|
+
name: task&.name,
|
55
|
+
type: task&.execution_type,
|
56
|
+
slot: task&.slot&.name,
|
57
|
+
node: task&.slot&.node&.name,
|
58
|
+
started_at: task.started_at,
|
59
|
+
duration: task.milliseconds_waiting,
|
60
|
+
error: task.error,
|
61
|
+
status: task.status
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTasksForAllExecutionTypesJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform
|
7
|
+
Slot.pluck(:execution_type).uniq.each do |execution_type|
|
8
|
+
RunTasksJob.perform_later(execution_type: execution_type)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RunTasksJob < ContainerBrokerBaseJob
|
4
|
+
attr_reader :execution_type
|
5
|
+
|
6
|
+
def perform(execution_type:)
|
7
|
+
@execution_type = execution_type
|
8
|
+
|
9
|
+
enqueue_tasks
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def enqueue_tasks
|
15
|
+
while have_pending_tasks? && (slot = lock_slot)
|
16
|
+
task = lock_task
|
17
|
+
if task
|
18
|
+
Rails.logger.debug "Perform_later RunTaskJob for #{slot} #{task}"
|
19
|
+
RunTaskJob.perform_later(slot: slot, task: task)
|
20
|
+
else
|
21
|
+
slot.available!
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def lock_slot
|
28
|
+
LockSlot.new(execution_type: execution_type).perform
|
29
|
+
end
|
30
|
+
|
31
|
+
def lock_task
|
32
|
+
lock_task_service.perform
|
33
|
+
end
|
34
|
+
|
35
|
+
def have_pending_tasks?
|
36
|
+
lock_task_service.any_pending?
|
37
|
+
end
|
38
|
+
|
39
|
+
def lock_task_service
|
40
|
+
@lock_task_service ||= LockTask.new(execution_type: execution_type)
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class TimeoutFailedTasksJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(*_args)
|
7
|
+
timeout_failed_tasks
|
8
|
+
end
|
9
|
+
|
10
|
+
def timeout_failed_tasks
|
11
|
+
failed_tasks_to_timeout.map do |task|
|
12
|
+
Rails.logger.debug("Marking task as error due to timeout: #{task.uuid}")
|
13
|
+
|
14
|
+
task.error!
|
15
|
+
|
16
|
+
persist_logs(task)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def failed_tasks_to_timeout
|
21
|
+
Task.failed.where(:finished_at.lt => Time.current - Settings.timeout_tasks_after_hours)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def persist_logs(task)
|
27
|
+
task.set_logs("#{task.get_logs}\nThis task was automatically marked as error due to timeout.\n".dup)
|
28
|
+
|
29
|
+
task.save!
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateNodeStatusJob < ContainerBrokerBaseJob
|
4
|
+
queue_as :default
|
5
|
+
|
6
|
+
def perform(node:)
|
7
|
+
Rails.logger.debug("Waiting for lock to update status of #{node}")
|
8
|
+
|
9
|
+
updated = LockManager.new(type: self.class.to_s, id: node.id, expire: 1.minute, wait: false).lock do
|
10
|
+
Rails.logger.debug("Lock acquired for update status of #{node}")
|
11
|
+
|
12
|
+
node.runner_service(:update_node_status).perform(node: node)
|
13
|
+
|
14
|
+
Rails.logger.debug("Releasing lock for update status of #{node}")
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
if updated
|
19
|
+
Rails.logger.debug("Lock released for update status of #{node}")
|
20
|
+
else
|
21
|
+
Rails.logger.debug("Node updating is locked by another job and will be ignored now")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateTaskStatusJob < ContainerBrokerBaseJob
|
4
|
+
extend RequestIdFromTask
|
5
|
+
|
6
|
+
class InvalidContainerStatusError < StandardError; end
|
7
|
+
|
8
|
+
queue_as :default
|
9
|
+
|
10
|
+
def perform(task)
|
11
|
+
Rails.logger.debug("Updating status for task #{task}")
|
12
|
+
Rails.logger.debug("Task #{task} is running in slot #{task.slot}")
|
13
|
+
|
14
|
+
execution_info = task.slot.node.runner_service(:fetch_execution_info).perform(task: task)
|
15
|
+
|
16
|
+
Rails.logger.debug("Got runner #{execution_info.id} with state #{execution_info.status}")
|
17
|
+
|
18
|
+
unless execution_info.terminated?
|
19
|
+
Rails.logger.debug("Runner should be terminated but it is #{execution_info.status}. Execution info is #{execution_info.to_h}")
|
20
|
+
raise InvalidContainerStatusError,
|
21
|
+
"Runner should be terminated (current status: #{execution_info.status})"
|
22
|
+
end
|
23
|
+
|
24
|
+
Rails.logger.debug("Container is in status #{execution_info.status} and exit code #{execution_info.exit_code}")
|
25
|
+
|
26
|
+
task.exit_code = execution_info.exit_code
|
27
|
+
task.started_at = execution_info.started_at
|
28
|
+
task.finished_at = execution_info.finished_at
|
29
|
+
|
30
|
+
persist_logs(task)
|
31
|
+
|
32
|
+
if execution_info.success?
|
33
|
+
Rails.logger.debug("Marking task as completed and no errors")
|
34
|
+
task.error = nil
|
35
|
+
task.completed!
|
36
|
+
else
|
37
|
+
Rails.logger.debug("Marked task for retry and set error as #{execution_info.error}")
|
38
|
+
task.mark_as_retry(error: execution_info.error)
|
39
|
+
end
|
40
|
+
|
41
|
+
task.save!
|
42
|
+
|
43
|
+
add_metric(task)
|
44
|
+
end
|
45
|
+
|
46
|
+
def persist_logs(task)
|
47
|
+
return unless task.persist_logs
|
48
|
+
|
49
|
+
Rails.logger.debug("Persisting logs for #{task}")
|
50
|
+
container_logs = task.slot.node.runner_service(:fetch_logs).perform(task: task)
|
51
|
+
task.set_logs(container_logs)
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_metric(task)
|
55
|
+
Metrics.new("tasks").count(
|
56
|
+
task_id: task.id,
|
57
|
+
event_id: task&.tags&.dig("event_id"),
|
58
|
+
api_id: task&.tags&.dig("api_id").to_i,
|
59
|
+
name: task&.name,
|
60
|
+
type: task&.execution_type,
|
61
|
+
slot: task&.slot&.name,
|
62
|
+
node: task&.slot&.node&.name,
|
63
|
+
started_at: task.started_at,
|
64
|
+
finished_at: task.finished_at,
|
65
|
+
duration: task.milliseconds_running,
|
66
|
+
processing_time: task.seconds_running.to_i,
|
67
|
+
error: task.error,
|
68
|
+
status: task.status
|
69
|
+
)
|
70
|
+
end
|
71
|
+
end
|