container_broker 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +98 -0
  3. data/Rakefile +8 -0
  4. data/app/controllers/application_controller.rb +5 -0
  5. data/app/controllers/healthcheck_controller.rb +21 -0
  6. data/app/controllers/nodes_controller.rb +70 -0
  7. data/app/controllers/nodes_healthcheck_controller.rb +28 -0
  8. data/app/controllers/status_controller.rb +48 -0
  9. data/app/controllers/tasks_controller.rb +83 -0
  10. data/app/controllers/tasks_healthcheck_controller.rb +28 -0
  11. data/app/jobs/add_task_tags_job.rb +13 -0
  12. data/app/jobs/adjust_node_slots_job.rb +27 -0
  13. data/app/jobs/application_job.rb +9 -0
  14. data/app/jobs/collect_load_metrics_job.rb +9 -0
  15. data/app/jobs/container_broker_base_job.rb +32 -0
  16. data/app/jobs/migrate_tasks_from_dead_node_job.rb +32 -0
  17. data/app/jobs/monitor_unresponsive_node_job.rb +21 -0
  18. data/app/jobs/monitor_unresponsive_nodes_job.rb +9 -0
  19. data/app/jobs/release_slot_job.rb +47 -0
  20. data/app/jobs/remove_runner_job.rb +11 -0
  21. data/app/jobs/remove_unused_tags_job.rb +25 -0
  22. data/app/jobs/request_id_from_task.rb +7 -0
  23. data/app/jobs/run_task_job.rb +64 -0
  24. data/app/jobs/run_tasks_for_all_execution_types_job.rb +11 -0
  25. data/app/jobs/run_tasks_job.rb +42 -0
  26. data/app/jobs/timeout_failed_tasks_job.rb +31 -0
  27. data/app/jobs/update_all_nodes_status_job.rb +9 -0
  28. data/app/jobs/update_node_status_job.rb +24 -0
  29. data/app/jobs/update_task_status_job.rb +71 -0
  30. data/app/models/mongoid_serializable_model.rb +14 -0
  31. data/app/models/node.rb +101 -0
  32. data/app/models/slot.rb +42 -0
  33. data/app/models/task.rb +148 -0
  34. data/app/models/task_tag.rb +11 -0
  35. data/app/observers/observable.rb +23 -0
  36. data/app/observers/task_observer.rb +11 -0
  37. data/app/serializers/node_healthcheck_serializer.rb +5 -0
  38. data/app/serializers/node_serializer.rb +5 -0
  39. data/app/serializers/status_panel_node_serializer.rb +9 -0
  40. data/app/serializers/status_panel_slot_serializer.rb +5 -0
  41. data/app/serializers/status_panel_task_serializer.rb +16 -0
  42. data/app/serializers/task_healthcheck_serializer.rb +5 -0
  43. data/app/serializers/task_serializer.rb +7 -0
  44. data/app/services/adjust_execution_type_slots.rb +51 -0
  45. data/app/services/check_for_slot_removal.rb +28 -0
  46. data/app/services/collect_load_metrics.rb +40 -0
  47. data/app/services/delete_node.rb +25 -0
  48. data/app/services/friendly_name_nodes.rb +10 -0
  49. data/app/services/friendly_name_slots.rb +15 -0
  50. data/app/services/kill_node_runners.rb +17 -0
  51. data/app/services/kill_task_container.rb +29 -0
  52. data/app/services/kubernetes_client.rb +136 -0
  53. data/app/services/least_used_node.rb +44 -0
  54. data/app/services/lock_manager.rb +74 -0
  55. data/app/services/lock_slot.rb +37 -0
  56. data/app/services/lock_task.rb +45 -0
  57. data/app/services/metrics.rb +43 -0
  58. data/app/services/migrate_runner.rb +26 -0
  59. data/app/services/node_task_acceptance.rb +18 -0
  60. data/app/services/node_usage_percentage_per_execution_type.rb +22 -0
  61. data/app/services/reschedule_tasks_for_missing_runners.rb +70 -0
  62. data/app/services/runners.rb +4 -0
  63. data/app/services/runners/docker/create_connection.rb +18 -0
  64. data/app/services/runners/docker/create_execution_info.rb +87 -0
  65. data/app/services/runners/docker/fetch_execution_info.rb +17 -0
  66. data/app/services/runners/docker/fetch_logs.rb +18 -0
  67. data/app/services/runners/docker/fetch_task_container.rb +15 -0
  68. data/app/services/runners/docker/filer.rb +19 -0
  69. data/app/services/runners/docker/kill_slot_runner.rb +19 -0
  70. data/app/services/runners/docker/node_availability.rb +11 -0
  71. data/app/services/runners/docker/remove_runner.rb +18 -0
  72. data/app/services/runners/docker/run_task.rb +63 -0
  73. data/app/services/runners/docker/update_node_status.rb +62 -0
  74. data/app/services/runners/execution_info.rb +49 -0
  75. data/app/services/runners/invalid_config.rb +5 -0
  76. data/app/services/runners/invalid_runner.rb +5 -0
  77. data/app/services/runners/kubernetes/create_client.rb +29 -0
  78. data/app/services/runners/kubernetes/create_execution_info.rb +103 -0
  79. data/app/services/runners/kubernetes/fetch_execution_info.rb +15 -0
  80. data/app/services/runners/kubernetes/fetch_logs.rb +17 -0
  81. data/app/services/runners/kubernetes/filer.rb +41 -0
  82. data/app/services/runners/kubernetes/kill_slot_runner.rb +11 -0
  83. data/app/services/runners/kubernetes/node_availability.rb +11 -0
  84. data/app/services/runners/kubernetes/remove_runner.rb +19 -0
  85. data/app/services/runners/kubernetes/run_task.rb +54 -0
  86. data/app/services/runners/kubernetes/update_node_status.rb +64 -0
  87. data/app/services/runners/runner_id_not_found_error.rb +5 -0
  88. data/app/services/runners/services_factory.rb +38 -0
  89. data/app/services/runners/update_node_status_helper.rb +43 -0
  90. data/app/services/slots_usage_percentage.rb +18 -0
  91. data/config/application.rb +34 -0
  92. data/config/boot.rb +5 -0
  93. data/config/environment.rb +7 -0
  94. data/config/environments/test.rb +44 -0
  95. data/config/initializers/application_controller_renderer.rb +10 -0
  96. data/config/initializers/backtrace_silencers.rb +9 -0
  97. data/config/initializers/config.rb +51 -0
  98. data/config/initializers/cookies_serializer.rb +7 -0
  99. data/config/initializers/docker_config.rb +3 -0
  100. data/config/initializers/filter_parameter_logging.rb +6 -0
  101. data/config/initializers/idempotent_request.rb +12 -0
  102. data/config/initializers/inflections.rb +18 -0
  103. data/config/initializers/mime_types.rb +6 -0
  104. data/config/initializers/mongoid.rb +3 -0
  105. data/config/initializers/new_framework_defaults_6_0.rb +47 -0
  106. data/config/initializers/raven.rb +10 -0
  107. data/config/initializers/sidekiq.rb +24 -0
  108. data/config/initializers/wrap_parameters.rb +16 -0
  109. data/config/locales/en.yml +33 -0
  110. data/config/mongoid.yml +10 -0
  111. data/config/routes.rb +43 -0
  112. data/config/secrets.yml +35 -0
  113. data/config/settings.yml +34 -0
  114. data/config/settings/test.yml +27 -0
  115. data/config/sidekiq_scheduler.yml +18 -0
  116. data/config/spring.rb +8 -0
  117. data/lib/constants.rb +12 -0
  118. data/lib/container_broker.rb +30 -0
  119. data/lib/container_broker/engine.rb +6 -0
  120. data/lib/container_broker/version.rb +5 -0
  121. data/lib/current_thread_request_id.rb +19 -0
  122. data/lib/idempotent_request/callback.rb +25 -0
  123. data/lib/idempotent_request/policy.rb +15 -0
  124. data/lib/redis_url_parser.rb +25 -0
  125. data/lib/tasks/task.rake +34 -0
  126. metadata +590 -0
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ module Kubernetes
5
+ class NodeAvailability
6
+ def perform(node:)
7
+ CreateClient.new.perform(node: node).api_info
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ module Kubernetes
5
+ class RemoveRunner
6
+ def perform(node:, runner_id:)
7
+ Rails.logger.debug("Deleting pod")
8
+ begin
9
+ CreateClient.new.perform(node: node).force_delete_pod(pod_name: runner_id)
10
+ Rails.logger.debug("Pod #{runner_id} removed")
11
+ rescue KubernetesClient::PodNotFoundError
12
+ Rails.logger.debug("Pod #{runner_id} already removed")
13
+ rescue KubernetesClient::NetworkError => e
14
+ node.register_error(e.message)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ module Kubernetes
5
+ class RunTask
6
+ NFS_NAME = "nfs"
7
+
8
+ def perform(task:, slot:, runner_id:)
9
+ create_pod(task: task, node: slot.node, runner_id: runner_id)
10
+ rescue KubernetesClient::NetworkError => e then
11
+ raise Node::NodeConnectionError, "#{e.class}: #{e.message}"
12
+ end
13
+
14
+ private
15
+
16
+ def create_pod(task:, node:, runner_id:)
17
+ CreateClient.new.perform(node: node).create_pod(
18
+ pod_name: runner_id,
19
+ image: task.image,
20
+ cmd: task.cmd,
21
+ internal_mounts: internal_mounts(task: task),
22
+ external_mounts: external_mounts(task: task),
23
+ node_selector: node.runner_config["node_selector"]
24
+ )
25
+ end
26
+
27
+ def filer(task:)
28
+ Filer.new.perform(task_storage_mounts: task.storage_mounts)
29
+ end
30
+
31
+ def internal_mounts(task:)
32
+ filer(task: task)[:internal]
33
+ end
34
+
35
+ def external_mounts(task:)
36
+ filer(task: task)[:external]
37
+ end
38
+
39
+ def add_metric(task)
40
+ Metrics.new("tasks").count(
41
+ task_id: task.id,
42
+ name: task&.name,
43
+ type: task&.execution_type,
44
+ slot: task&.slot&.name,
45
+ node: task&.slot&.node&.name,
46
+ started_at: task.started_at,
47
+ duration: task.milliseconds_waiting,
48
+ error: task.error,
49
+ status: task.status
50
+ )
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ module Kubernetes
5
+ class UpdateNodeStatus
6
+ include UpdateNodeStatusHelper
7
+
8
+ attr_reader :node
9
+
10
+ def perform(node:)
11
+ @node = node
12
+
13
+ # Other tasks can be started at this time. Because of this it's necessary to load the tasks first and then the containers
14
+ started_tasks = Task.started.where(:slot.in => node.slots.pluck(:id)).to_a
15
+
16
+ node.update!(runner_capacity_reached: pending_schedule_pods?)
17
+
18
+ execution_infos.each do |execution_info|
19
+ runner_id = execution_info.id
20
+ slot = node.slots.find_by(runner_id: runner_id)
21
+
22
+ if slot
23
+ if execution_info.terminated?
24
+ Rails.logger.debug("Pod #{runner_id} Complete")
25
+ check_slot_release(slot: slot, runner_id: runner_id)
26
+ else
27
+ slot.current_task&.update!(error: execution_info.error) if execution_info.error
28
+ Rails.logger.debug("Pod is not terminated (it is #{execution_info.status}). Ignoring.")
29
+ end
30
+ else
31
+ remove_unknown_runner(node: node, runner_id: runner_id)
32
+ end
33
+ end
34
+
35
+ RescheduleTasksForMissingRunners
36
+ .new(runner_ids: pods.keys, started_tasks: started_tasks)
37
+ .perform
38
+
39
+ node.register_success
40
+
41
+ send_metrics(node: node, execution_infos: execution_infos)
42
+ rescue KubernetesClient::NetworkError => e
43
+ Rails.logger.debug("Error #{e.class}: #{e}")
44
+ node.register_error(e.message)
45
+ end
46
+
47
+ private
48
+
49
+ def pods
50
+ @pods ||= CreateClient.new.perform(node: node)
51
+ .fetch_pods
52
+ .tap { |pods| Rails.logger.debug("Fetched #{pods.count} pods") }
53
+ end
54
+
55
+ def execution_infos
56
+ @execution_infos ||= pods.values.map { |pod| CreateExecutionInfo.new.perform(pod: pod) }
57
+ end
58
+
59
+ def pending_schedule_pods?
60
+ execution_infos.any?(&:schedule_pending?)
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ class RunnerIdNotFoundError < StandardError; end
5
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ class ServicesFactory
5
+ class ServiceNotFoundForRunner < StandardError; end
6
+
7
+ SERVICES = {
8
+ kubernetes: {
9
+ update_node_status: Runners::Kubernetes::UpdateNodeStatus,
10
+ node_availability: Runners::Kubernetes::NodeAvailability,
11
+ run_task: Runners::Kubernetes::RunTask,
12
+ kill_slot_runner: Runners::Kubernetes::KillSlotRunner,
13
+ remove_runner: Runners::Kubernetes::RemoveRunner,
14
+ fetch_logs: Runners::Kubernetes::FetchLogs,
15
+ fetch_execution_info: Runners::Kubernetes::FetchExecutionInfo,
16
+ filer: Runners::Kubernetes::Filer
17
+ },
18
+ docker: {
19
+ update_node_status: Runners::Docker::UpdateNodeStatus,
20
+ node_availability: Runners::Docker::NodeAvailability,
21
+ run_task: Runners::Docker::RunTask,
22
+ kill_slot_runner: Runners::Docker::KillSlotRunner,
23
+ remove_runner: Runners::Docker::RemoveRunner,
24
+ fetch_logs: Runners::Docker::FetchLogs,
25
+ fetch_execution_info: Runners::Docker::FetchExecutionInfo,
26
+ filer: Runners::Docker::Filer
27
+ }
28
+ }.freeze
29
+
30
+ def self.fabricate(runner:, service:)
31
+ service_class = SERVICES.dig(runner.to_sym, service)
32
+
33
+ raise ServiceNotFoundForRunner, "No service #{service} found for #{runner}" unless service_class
34
+
35
+ service_class.new
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Runners
4
+ module UpdateNodeStatusHelper
5
+ def check_slot_release(slot:, runner_id:)
6
+ if slot.running?
7
+ slot.releasing!
8
+ Rails.logger.debug("Slot was running. Marked as releasing. Slot: #{slot}. Current task: #{slot.current_task}")
9
+ ReleaseSlotJob.perform_later(slot: MongoidSerializableModel.new(slot), runner_id: runner_id)
10
+ else
11
+ Rails.logger.debug("Slot was not running (it was #{slot.status}). Ignoring.")
12
+ end
13
+ end
14
+
15
+ def remove_unknown_runner(node:, runner_id:)
16
+ Rails.logger.debug("Slot not found for container #{runner_id}")
17
+
18
+ if Settings.ignore_containers.none? { |ignored_name| runner_id.start_with?(ignored_name) }
19
+ # It is needed to select the container using just any of its names
20
+ RemoveRunnerJob.perform_later(node: node, runner_id: runner_id)
21
+ else
22
+ Rails.logger.debug("Container #{runner_id} is ignored for removal")
23
+ end
24
+ end
25
+
26
+ def send_metrics(node:, execution_infos:)
27
+ runners_count = execution_infos
28
+ .group_by(&:status)
29
+ .transform_keys { |k| "#{k}_runners".to_sym }
30
+ .transform_values(&:count)
31
+
32
+ data = {
33
+ hostname: node.hostname,
34
+ runner_type: node.runner_provider,
35
+ capacity_reached: node.runner_capacity_reached,
36
+ schedule_pending: execution_infos.count(&:schedule_pending?),
37
+ total_runners: execution_infos.count
38
+ }
39
+
40
+ Metrics.new("runners").count(data.merge(runners_count))
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ class SlotsUsagePercentage
4
+ def initialize(slots)
5
+ @slots = slots
6
+ end
7
+
8
+ def perform
9
+ (((@slots.size - available_slots.size).to_f / @slots.size) * 100)
10
+ .round(2)
11
+ end
12
+
13
+ private
14
+
15
+ def available_slots
16
+ @slots.select(&:available?)
17
+ end
18
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "boot"
4
+
5
+ require "action_controller/railtie"
6
+ # require "action_view/railtie"
7
+ # require "action_mailer/railtie"
8
+ require "active_job/railtie"
9
+ # require "action_cable/engine"
10
+ # require "rails/test_unit/railtie"
11
+ # require "sprockets/railtie"
12
+
13
+ # Require the gems listed in Gemfile, including any gems
14
+ # you've limited to :test, :development, or :production.
15
+ Bundler.require(*Rails.groups)
16
+
17
+ module ContainerBroker
18
+ class Application < Rails::Application
19
+ # Initialize configuration defaults for originally generated Rails version.
20
+ config.load_defaults 5.1
21
+
22
+ config.api_only = true
23
+
24
+ # Settings in config/environments/* take precedence over those specified here.
25
+ # Application configuration should go into files in config/initializers
26
+ # -- all .rb files in that directory are automatically loaded.
27
+
28
+ config.eager_load_paths << Rails.root.join("lib")
29
+
30
+ config.log_tags = [
31
+ ->(request) { " request_id=#{request.request_id} " }
32
+ ]
33
+ end
34
+ end
data/config/boot.rb ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __dir__)
4
+
5
+ require "bundler/setup" # Set up gems listed in the Gemfile.
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Load the Rails application.
4
+ require_relative "application"
5
+
6
+ # Initialize the Rails application.
7
+ Rails.application.initialize!
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ Rails.application.configure do
4
+ # Settings specified here will take precedence over those in config/application.rb.
5
+
6
+ # The test environment is used exclusively to run your application's
7
+ # test suite. You never need to work with it otherwise. Remember that
8
+ # your test database is "scratch space" for the test suite and is wiped
9
+ # and recreated between test runs. Don't rely on the data there!
10
+ config.cache_classes = true
11
+
12
+ # Do not eager load code on boot. This avoids loading your whole application
13
+ # just for the purpose of running a single test. If you are using a tool that
14
+ # preloads Rails for running tests, you may have to set it to true.
15
+ config.eager_load = false
16
+
17
+ # Configure public file server for tests with Cache-Control for performance.
18
+ config.public_file_server.enabled = true
19
+ config.public_file_server.headers = {
20
+ "Cache-Control" => "public, max-age=#{1.hour.seconds.to_i}"
21
+ }
22
+
23
+ # Show full error reports and disable caching.
24
+ config.consider_all_requests_local = true
25
+ config.action_controller.perform_caching = false
26
+
27
+ # Raise exceptions instead of rendering exception templates.
28
+ config.action_dispatch.show_exceptions = false
29
+
30
+ # Disable request forgery protection in test environment.
31
+ config.action_controller.allow_forgery_protection = false
32
+ # config.action_mailer.perform_caching = false
33
+
34
+ # Tell Action Mailer not to deliver emails to the real world.
35
+ # The :test delivery method accumulates sent emails in the
36
+ # ActionMailer::Base.deliveries array.
37
+ # config.action_mailer.delivery_method = :test
38
+
39
+ # Print deprecation notices to the stderr.
40
+ config.active_support.deprecation = :stderr
41
+
42
+ # Raises error for missing translations
43
+ # config.action_view.raise_on_missing_translations = true
44
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Be sure to restart your server when you modify this file.
4
+
5
+ # ActiveSupport::Reloader.to_prepare do
6
+ # ApplicationController.renderer.defaults.merge!(
7
+ # http_host: 'example.org',
8
+ # https: false
9
+ # )
10
+ # end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Be sure to restart your server when you modify this file.
4
+
5
+ # You can add backtrace silencers for libraries that you're using but don't wish to see in your backtraces.
6
+ # Rails.backtrace_cleaner.add_silencer { |line| line =~ /my_noisy_library/ }
7
+
8
+ # You can also remove all the silencers if you're trying to debug a problem that might stem from framework code.
9
+ # Rails.backtrace_cleaner.remove_silencers!
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ Config.setup do |config|
4
+ # Name of the constant exposing loaded settings
5
+ config.const_name = "Settings"
6
+
7
+ # Ability to remove elements of the array set in earlier loaded settings file. For example value: '--'.
8
+ #
9
+ # config.knockout_prefix = nil
10
+
11
+ # Overwrite an existing value when merging a `nil` value.
12
+ # When set to `false`, the existing value is retained after merge.
13
+ #
14
+ # config.merge_nil_values = true
15
+
16
+ # Overwrite arrays found in previously loaded settings file. When set to `false`, arrays will be merged.
17
+ #
18
+ # config.overwrite_arrays = true
19
+
20
+ # Load environment variables from the `ENV` object and override any settings defined in files.
21
+ #
22
+ # config.use_env = false
23
+
24
+ # Define ENV variable prefix deciding which variables to load into config.
25
+ #
26
+ # config.env_prefix = 'Settings'
27
+
28
+ # What string to use as level separator for settings loaded from ENV variables. Default value of '.' works well
29
+ # with Heroku, but you might want to change it for example for '__' to easy override settings from command line, where
30
+ # using dots in variable names might not be allowed (eg. Bash).
31
+ #
32
+ # config.env_separator = '.'
33
+
34
+ # Ability to process variables names:
35
+ # * nil - no change
36
+ # * :downcase - convert to lower case
37
+ #
38
+ # config.env_converter = :downcase
39
+
40
+ # Parse numeric values as integers instead of strings.
41
+ #
42
+ # config.env_parse_values = true
43
+
44
+ # Validate presence and type of specific config values. Check https://github.com/dry-rb/dry-validation for details.
45
+ #
46
+ # config.schema do
47
+ # required(:name).filled
48
+ # required(:age).maybe(:int?)
49
+ # required(:email).filled(format?: EMAIL_REGEX)
50
+ # end
51
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Be sure to restart your server when you modify this file.
4
+
5
+ # Specify a serializer for the signed and encrypted cookie jars.
6
+ # Valid options are :json, :marshal, and :hybrid.
7
+ Rails.application.config.action_dispatch.cookies_serializer = :json