RubyGems - connectors_service - Versions diffs - 8.6.0.4.pre.20221114T233727Z → 8.6.0.4.pre.20221116T024501Z - Mend

connectors_service 8.6.0.4.pre.20221114T233727Z → 8.6.0.4.pre.20221116T024501Z

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/config/connectors.yml +4 -4
data/lib/app/app.rb +4 -0
data/lib/app/dispatcher.rb +30 -17
data/lib/connectors/base/advanced_snippet_against_schema_validator.rb +173 -0
data/lib/connectors/base/advanced_snippet_validator.rb +34 -0
data/lib/connectors/base/connector.rb +27 -5
data/lib/connectors/example/connector.rb +3 -12
data/lib/connectors/example/example_advanced_snippet_validator.rb +35 -0
data/lib/connectors/gitlab/connector.rb +3 -12
data/lib/connectors/gitlab/gitlab_advanced_snippet_validator.rb +35 -0
data/lib/connectors/mongodb/connector.rb +9 -24
data/lib/connectors/mongodb/mongo_advanced_snippet_against_schema_validator.rb +22 -0
data/lib/connectors/mongodb/mongo_advanced_snippet_schema.rb +292 -0
data/lib/connectors/sync_status.rb +6 -1
data/lib/connectors/tolerable_error_helper.rb +43 -0
data/lib/core/connector_job.rb +96 -23
data/lib/core/connector_settings.rb +29 -6
data/lib/core/elastic_connector_actions.rb +77 -55
data/lib/core/filtering/validation_job_runner.rb +1 -1
data/lib/core/ingestion/es_sink.rb +68 -9
data/lib/core/ingestion.rb +0 -1
data/lib/core/jobs/consumer.rb +114 -0
data/lib/core/jobs/producer.rb +26 -0
data/lib/core/single_scheduler.rb +1 -1
data/lib/core/sync_job_runner.rb +20 -12
data/lib/core.rb +2 -0
data/lib/utility/error_monitor.rb +108 -0
data/lib/utility/errors.rb +0 -12
data/lib/utility/logger.rb +0 -1
data/lib/utility.rb +6 -0
metadata +12 -3
data/lib/core/ingestion/ingester.rb +0 -90

data/lib/core/elastic_connector_actions.rb CHANGED Viewed

@@ -132,11 +132,35 @@ module Core
         update_connector_fields(connector_id, { :filtering => filtering })
       end
-      def claim_job(connector_id)
+      def update_connector_sync_now(connector_id, sync_now)
+        doc = connector_with_concurrency_control(connector_id)
+        body = { sync_now: sync_now, last_synced: Time.now }
+        update_connector_fields(
+          connector_id,
+          body,
+          doc[:seq_no],
+          doc[:primary_term]
+        )
+      end
+      def update_connector_last_sync_status(connector_id, last_sync_status)
+        doc = connector_with_concurrency_control(connector_id)
+        update_connector_fields(
+          connector_id,
+          { last_sync_status: last_sync_status },
+          doc[:seq_no],
+          doc[:primary_term]
+        )
+      end
+      def connector_with_concurrency_control(connector_id)
         seq_no = nil
         primary_term = nil
-        sync_in_progress = false
-        connector_record = client.get(
+        doc = client.get(
           :index => Utility::Constants::CONNECTORS_INDEX,
           :id => connector_id,
           :ignore => 404,
@@ -144,42 +168,31 @@ module Core
         ).tap do |response|
           seq_no = response['_seq_no']
           primary_term = response['_primary_term']
-          sync_in_progress = response.dig('_source', 'last_sync_status') == Connectors::SyncStatus::IN_PROGRESS
-        end
-        if sync_in_progress
-          raise JobAlreadyRunningError.new(connector_id)
         end
-        update_connector_fields(
-          connector_id,
-          { :sync_now => false,
-            :last_sync_status => Connectors::SyncStatus::IN_PROGRESS,
-            :last_synced => Time.now },
-          seq_no,
-          primary_term
-        )
+        { doc: doc, seq_no: seq_no, primary_term: primary_term }
+      end
+      def create_job(connector_settings:)
         body = {
-          :status => Connectors::SyncStatus::IN_PROGRESS,
-          :worker_hostname => Socket.gethostname,
-          :created_at => Time.now,
-          :started_at => Time.now,
-          :last_seen => Time.now,
-          :connector => {
-            :id => connector_id,
-            :filtering => convert_connector_filtering_to_job_filtering(connector_record.dig('_source', 'filtering'))
+          status: Connectors::SyncStatus::PENDING,
+          created_at: Time.now,
+          last_seen: Time.now,
+          connector: {
+            id: connector_settings.id,
+            filtering: convert_connector_filtering_to_job_filtering(connector_settings.filtering),
+            index_name: connector_settings.index_name,
+            language: connector_settings[:language],
+            pipeline: connector_settings[:pipeline],
+            service_type: connector_settings.service_type
           }
         }
-        index_response = client.index(:index => Utility::Constants::JOB_INDEX, :body => body, :refresh => true)
-        if index_response['result'] == 'created'
-          # TODO: remove the usage of with_indifferent_access. Ideally this should return a hash or nil if not found
-          return client.get(
-            :index => Utility::Constants::JOB_INDEX,
-            :id => index_response['_id'],
-            :ignore => 404
-          ).with_indifferent_access
-        end
-        raise JobNotCreatedError.new(connector_id, index_response)
+        index_response = client.index(index: Utility::Constants::JOB_INDEX, body: body, refresh: true)
+        return index_response if index_response['result'] == 'created'
+        raise JobNotCreatedError.new(connector_settings.id, index_response)
       end
       def convert_connector_filtering_to_job_filtering(connector_filtering)
@@ -507,31 +520,15 @@ module Core
       end
       def update_connector_fields(connector_id, doc = {}, seq_no = nil, primary_term = nil)
-        return if doc.empty?
-        update_args = {
-          :index => Utility::Constants::CONNECTORS_INDEX,
-          :id => connector_id,
-          :body => { :doc => doc },
-          :refresh => true,
-          :retry_on_conflict => 3
-        }
-        # seq_no and primary_term are used for optimistic concurrency control
-        # see https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
-        if seq_no && primary_term
-          update_args[:if_seq_no] = seq_no
-          update_args[:if_primary_term] = primary_term
-          update_args.delete(:retry_on_conflict)
-        end
-        begin
-          client.update(update_args)
-        rescue Elastic::Transport::Transport::Errors::Conflict
-          # VersionConflictException
-          # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
-          raise ConnectorVersionChangedError.new(connector_id, seq_no, primary_term)
-        end
+        update_doc_fields(Utility::Constants::CONNECTORS_INDEX, connector_id, doc, seq_no, primary_term)
+      end
+      def update_job_fields(job_id, doc = {}, seq_no = nil, primary_term = nil)
+        update_doc_fields(Utility::Constants::JOB_INDEX, job_id, doc, seq_no, primary_term)
       end
       def document_count(index_name)
+        client.indices.refresh(:index => index_name)
         client.count(:index => index_name)['count']
       end
@@ -563,6 +560,31 @@ module Core
           filter.deep_merge!(new_validation_state)
         end
       end
+      def update_doc_fields(index, id, doc = {}, seq_no = nil, primary_term = nil)
+        return if doc.empty?
+        update_args = {
+          :index => index,
+          :id => id,
+          :body => { :doc => doc },
+          :refresh => true,
+          :retry_on_conflict => 3
+        }
+        if seq_no && primary_term
+          update_args[:if_seq_no] = seq_no
+          update_args[:if_primary_term] = primary_term
+          update_args.delete(:retry_on_conflict)
+        end
+        begin
+          client.update(update_args)
+        rescue Elastic::Transport::Transport::Errors::Conflict
+          # VersionConflictException
+          # see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#optimistic-concurrency-control-index
+          raise ConnectorVersionChangedError.new(id, seq_no, primary_term)
+        end
+      end
     end
   end
 end

data/lib/core/filtering/validation_job_runner.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module Core
       def execute
         Utility::Logger.info("Starting a validation job for connector #{@connector_settings.id}.")
-        validation_result = @connector_class.validate_filtering(@connector_settings.filtering)
+        validation_result = @connector_class.validate_filtering(@connector_settings.filtering[:draft])
         # currently only used for connectors -> DEFAULT domain can be assumed (will be changed with the integration of crawler)
         ElasticConnectorActions.update_filtering_validation(@connector_settings.id, { DEFAULT_DOMAIN => validation_result })

data/lib/core/ingestion/es_sink.rb CHANGED Viewed

@@ -11,19 +11,54 @@ require 'utility/bulk_queue'
 require 'utility/es_client'
 require 'utility/logger'
 require 'elasticsearch/api'
+#
+# This class is responsible for sending the data to the data storage.
+# While we don't actually allow to output our data anywhere except
+# Elasticsearch, we still want to be able to do so sometime in future.
+#
+# This class should stay simple and any change to the class should be careful
+# with the thought of introducing other sinks in future.
 module Core
   module Ingestion
     class EsSink
-      def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new)
+      def initialize(index_name, request_pipeline, bulk_queue = Utility::BulkQueue.new, max_allowed_document_size = 5 * 1024 * 1024)
         @client = Utility::EsClient.new(App::Config[:elasticsearch])
         @index_name = index_name
         @request_pipeline = request_pipeline
         @operation_queue = bulk_queue
+        @max_allowed_document_size = max_allowed_document_size
+        @queued = {
+          :indexed_document_count => 0,
+          :deleted_document_count => 0,
+          :indexed_document_volume => 0
+        }
+        @completed = {
+          :indexed_document_count => 0,
+          :deleted_document_count => 0,
+          :indexed_document_volume => 0
+        }
       end
-      def ingest(id, serialized_document)
-        index_op = serialize({ 'index' => { '_index' => index_name, '_id' => id } })
+      def ingest(document)
+        if document.nil? || document.empty?
+          Utility::Logger.warn('Connector attempted to ingest an empty document, skipping')
+          return
+        end
+        id = document['id']
+        serialized_document = serialize(document)
+        document_size = serialized_document.bytesize
+        if @max_allowed_document_size > 0 && document_size > @max_allowed_document_size
+          Utility::Logger.warn("Connector attempted to ingest too large document with id=#{document['id']} [#{document_size}/#{@max_allowed_document_size}], skipping the document.")
+          return
+        end
+        index_op = serialize({ 'index' => { '_index' => @index_name, '_id' => id } })
         flush unless @operation_queue.will_fit?(index_op, serialized_document)
@@ -31,13 +66,27 @@ module Core
           index_op,
           serialized_document
         )
+        @queued[:indexed_document_count] += 1
+        @queued[:indexed_document_volume] += document_size
+      end
+      def ingest_multiple(documents)
+        documents.each { |doc| ingest(doc) }
       end
-      def delete(doc_id)
-        delete_op = serialize({ 'delete' => { '_index' => index_name, '_id' => doc_id } })
+      def delete(id)
+        return if id.nil?
+        delete_op = serialize({ 'delete' => { '_index' => @index_name, '_id' => id } })
         flush unless @operation_queue.will_fit?(delete_op)
         @operation_queue.add(delete_op)
+        @queued[:deleted_document_count] += 1
+      end
+      def delete_multiple(ids)
+        ids.each { |id| delete(id) }
       end
       def flush
@@ -45,15 +94,25 @@ module Core
         return if data.empty?
         @client.bulk(:body => data, :pipeline => @request_pipeline)
+        @completed[:indexed_document_count] += @queued[:indexed_document_count]
+        @completed[:deleted_document_count] += @queued[:deleted_document_count]
+        @completed[:indexed_document_volume] += @queued[:indexed_document_volume]
+        @queued[:indexed_document_count] = 0
+        @queued[:deleted_document_count] = 0
+        @queued[:indexed_document_volume] = 0
       end
-      def serialize(obj)
-        Elasticsearch::API.serializer.dump(obj)
+      def ingestion_stats
+        @completed.dup
       end
       private
-      attr_accessor :index_name
+      def serialize(document)
+        Elasticsearch::API.serializer.dump(document)
+      end
     end
   end
 end

data/lib/core/ingestion.rb CHANGED Viewed

@@ -6,5 +6,4 @@
 # frozen_string_literal: true
-require 'core/ingestion/ingester'
 require 'core/ingestion/es_sink'

data/lib/core/jobs/consumer.rb ADDED Viewed

@@ -0,0 +1,114 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License;
+# you may not use this file except in compliance with the Elastic License.
+#
+# frozen_string_literal: true
+module Core
+  module Jobs
+    class Consumer
+      def initialize(scheduler:, poll_interval: 3, termination_timeout: 60, min_threads: 1, max_threads: 5, max_queue: 100, idle_time: 5)
+        @scheduler = scheduler
+        @poll_interval = poll_interval
+        @termination_timeout = termination_timeout
+        @min_threads = min_threads
+        @max_threads = max_threads
+        @max_queue = max_queue
+        @idle_time = idle_time
+        @running = Concurrent::AtomicBoolean.new(false)
+      end
+      def subscribe!(index_name:)
+        @index_name = index_name
+        start_loop!
+      end
+      def running?
+        # @TODO check if a loop thread is alive
+        pool.running? && @running.true?
+      end
+      def shutdown!
+        Utility::Logger.info("Shutting down consumer for #{@index_name} index")
+        @running.make_false
+        pool.shutdown
+        pool.wait_for_termination(@termination_timeout)
+        # reset pool
+        @pool = nil
+      end
+      private
+      def start_loop!
+        Utility::Logger.info("Starting a new consumer for #{@index_name} index")
+        Thread.new do
+          # assign a name to the thread
+          # see @TODO in #self.running?
+          Thread.current[:name] = "consumer-group-#{@index_name}"
+          loop do
+            if @running.false?
+              Utility::Logger.info('Shutting down the loop')
+              break
+            end
+            sleep(@poll_interval)
+            Utility::Logger.debug('Getting registered connectors')
+            connectors = ready_for_sync_connectors
+            next unless connectors.any?
+            Utility::Logger.debug("Number of available connectors: #{connectors.size}")
+            # @TODO It is assumed that @index_name is used to retrive pending jobs.
+            # This will be discussed after 8.6 release
+            pending_jobs = Core::ConnectorJob.pending_jobs(connectors_ids: connectors.keys)
+            Utility::Logger.info("Number of pending jobs: #{pending_jobs.size}")
+            pending_jobs.each do |job|
+              connector_settings = connectors[job.connector_id]
+              pool.post do
+                Utility::Logger.info("Connector #{connector_settings.formatted} picked up the job #{job.id}")
+                Core::ElasticConnectorActions.ensure_content_index_exists(connector_settings.index_name)
+                job_runner = Core::SyncJobRunner.new(connector_settings, job)
+                job_runner.execute
+              rescue Core::JobAlreadyRunningError
+                Utility::Logger.info("Sync job for #{connector_settings.formatted} is already running, skipping.")
+              rescue Core::ConnectorVersionChangedError => e
+                Utility::Logger.info("Could not start the job because #{connector_settings.formatted} has been updated externally. Message: #{e.message}")
+              rescue StandardError => e
+                Utility::ExceptionTracking.log_exception(e, "Sync job for #{connector_settings.formatted} failed due to unexpected error.")
+              end
+            end
+          rescue StandardError => e
+            Utility::ExceptionTracking.log_exception(e, 'The consumer group failed')
+          end
+        end
+        @running.make_true
+      end
+      def pool
+        @pool ||= Concurrent::ThreadPoolExecutor.new(
+          min_threads: @min_threads,
+          max_threads: @max_threads,
+          max_queue: @max_queue,
+          fallback_policy: :abort,
+          idletime: @idle_time
+        )
+      end
+      def ready_for_sync_connectors
+        @scheduler.connector_settings
+          .select(&:ready_for_sync?)
+          .inject({}) { |memo, cs| memo.merge(cs.id => cs) }
+      end
+    end
+  end
+end

data/lib/core/jobs/producer.rb ADDED Viewed

@@ -0,0 +1,26 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License;
+# you may not use this file except in compliance with the Elastic License.
+#
+# frozen_string_literal: true
+module Core
+  module Jobs
+    class Producer
+      JOB_TYPES = %i(sync).freeze
+      class << self
+        def enqueue_job(job_type:, connector_settings:)
+          raise UnsupportedJobType unless JOB_TYPES.include?(job_type)
+          raise ArgumentError unless connector_settings.kind_of?(ConnectorSettings)
+          ElasticConnectorActions.create_job(connector_settings: connector_settings)
+        end
+      end
+    end
+    class UnsupportedJobType < StandardError; end
+  end
+end

data/lib/core/single_scheduler.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module Core
     def connector_settings
       connector_settings = Core::ConnectorSettings.fetch_by_id(@connector_id)
-      [connector_settings]
+      [connector_settings].compact
     rescue *Utility::AUTHORIZATION_ERRORS => e
       # should be handled by the general scheduler
       raise e

data/lib/core/sync_job_runner.rb CHANGED Viewed

@@ -23,9 +23,9 @@ module Core
   class SyncJobRunner
     JOB_REPORTING_INTERVAL = 10
-    def initialize(connector_settings)
+    def initialize(connector_settings, job)
       @connector_settings = connector_settings
-      @ingester = Core::Ingestion::Ingester.new(Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline))
+      @sink = Core::Ingestion::EsSink.new(connector_settings.index_name, @connector_settings.request_pipeline)
       @connector_class = Connectors::REGISTRY.connector_class(connector_settings.service_type)
       @sync_finished = false
       @sync_error = nil
@@ -35,6 +35,7 @@ module Core
         :indexed_document_volume => 0,
         :error => nil
       }
+      @job = job
     end
     def execute
@@ -47,9 +48,16 @@ module Core
     def do_sync!
       Utility::Logger.info("Claiming a sync job for connector #{@connector_settings.id}.")
-      job_record = ElasticConnectorActions.claim_job(@connector_settings.id)
-      job_description = job_record['_source']
-      job_id = job_record['_id']
+      # connector service doesn't support multiple jobs running simultaneously
+      raise Core::JobAlreadyRunningError.new(@connector_settings.id) if @connector_settings.running?
+      Core::ElasticConnectorActions.update_connector_last_sync_status(@connector_settings.id, Connectors::SyncStatus::IN_PROGRESS)
+      # claim the job
+      @job.make_running!
+      job_description = @job.es_source
+      job_id = @job.id
       job_description['_id'] = job_id
       unless job_id.present?
@@ -80,12 +88,12 @@ module Core
           document = add_ingest_metadata(document)
           post_process_result = post_processing_engine.process(document)
           if post_process_result.is_include?
-            @ingester.ingest(document)
+            @sink.ingest(document)
             incoming_ids << document['id']
           end
           if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
-            ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
+            ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
             reporting_cycle_start = Time.now
           end
         end
@@ -95,15 +103,15 @@ module Core
         Utility::Logger.info("Deleting #{ids_to_delete.size} documents from index #{@connector_settings.index_name}.")
         ids_to_delete.each do |id|
-          @ingester.delete(id)
+          @sink.delete(id)
           if Time.now - reporting_cycle_start >= JOB_REPORTING_INTERVAL
-            ElasticConnectorActions.update_sync(job_id, @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata))
+            ElasticConnectorActions.update_sync(job_id, @sink.ingestion_stats.merge(:metadata => connector_instance.metadata))
             reporting_cycle_start = Time.now
           end
         end
-        @ingester.flush
+        @sink.flush
         # We use this mechanism for checking, whether an interrupt (or something else lead to the thread not finishing)
         # occurred as most of the time the main execution thread is interrupted and we miss this Signal/Exception here
@@ -112,7 +120,7 @@ module Core
         @sync_error = e.message
         Utility::ExceptionTracking.log_exception(e)
       ensure
-        stats = @ingester.ingestion_stats
+        stats = @sink.ingestion_stats
         Utility::Logger.debug("Sync stats are: #{stats}")
@@ -129,7 +137,7 @@ module Core
         end
         unless connector_instance.nil?
-          metadata = @ingester.ingestion_stats.merge(:metadata => connector_instance.metadata)
+          metadata = @sink.ingestion_stats.merge(:metadata => connector_instance.metadata)
           metadata[:total_document_count] = ElasticConnectorActions.document_count(@connector_settings.index_name)
         end

data/lib/core.rb CHANGED Viewed

@@ -16,3 +16,5 @@ require 'core/scheduler'
 require 'core/single_scheduler'
 require 'core/native_scheduler'
 require 'core/sync_job_runner'
+require 'core/jobs/producer'
+require 'core/jobs/consumer'

data/lib/utility/error_monitor.rb ADDED Viewed

@@ -0,0 +1,108 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License;
+# you may not use this file except in compliance with the Elastic License.
+#
+# frozen_string_literal: true
+require 'time'
+require 'utility/errors'
+require 'utility/exception_tracking'
+module Utility
+  class ErrorMonitor
+    class MonitoringError < StandardError
+      attr_accessor :tripped_by
+      def initialize(message = nil, tripped_by: nil)
+        super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
+        @tripped_by = tripped_by
+      end
+    end
+    class MaxSuccessiveErrorsExceededError < MonitoringError; end
+    class MaxErrorsExceededError < MonitoringError; end
+    class MaxErrorsInWindowExceededError < MonitoringError; end
+    attr_reader :total_error_count, :success_count, :consecutive_error_count, :error_queue
+    def initialize(
+      max_errors: 1000,
+      max_consecutive_errors: 10,
+      max_error_ratio: 0.15,
+      window_size: 100,
+      error_queue_size: 20
+    )
+      @max_errors = max_errors
+      @max_consecutive_errors = max_consecutive_errors
+      @max_error_ratio = max_error_ratio
+      @window_size = window_size
+      @total_error_count = 0
+      @success_count = 0
+      @consecutive_error_count = 0
+      @window_errors = Array.new(window_size) { false }
+      @window_index = 0
+      @last_error = nil
+      @error_queue_size = error_queue_size
+      @error_queue = []
+    end
+    def note_success
+      @consecutive_error_count = 0
+      @success_count += 1
+      increment_window_index
+    end
+    def note_error(error, id: Time.now.to_i)
+      stack_trace = Utility::ExceptionTracking.generate_stack_trace(error)
+      error_message = Utility::ExceptionTracking.generate_error_message(error, nil, nil)
+      Utility::Logger.debug("Message id: #{id} - #{error_message}\n#{stack_trace}")
+      @total_error_count += 1
+      @consecutive_error_count += 1
+      @window_errors[@window_index] = true
+      @error_queue << DocumentError.new(error.class.name, error_message, stack_trace, id)
+      @error_queue = @error_queue.drop(1) if @error_queue.size > @error_queue_size
+      increment_window_index
+      @last_error = error
+      raise_if_necessary
+    end
+    def finalize
+      total_documents = @total_error_count + @success_count
+      if total_documents > 0 && @total_error_count.to_f / total_documents > @max_error_ratio
+        raise_with_last_cause(MaxErrorsInWindowExceededError.new("There were #{@total_error_count} errors out of #{total_documents} total documents", :tripped_by => @last_error))
+      end
+    end
+    private
+    def raise_if_necessary
+      error =
+        if @consecutive_error_count > @max_consecutive_errors
+          MaxSuccessiveErrorsExceededError.new("Exceeded maximum consecutive errors - saw #{@consecutive_error_count} errors in a row.", :tripped_by => @last_error)
+        elsif @total_error_count > @max_errors
+          MaxErrorsExceededError.new("Exceeded maximum number of errors - saw #{@total_error_count} errors in total.", :tripped_by => @last_error)
+        elsif @window_size > 0 && num_errors_in_window / @window_size > @max_error_ratio
+          MaxErrorsInWindowExceededError.new("Exceeded maximum error ratio of #{@max_error_ratio}. Of the last #{@window_size} documents, #{num_errors_in_window} had errors", :tripped_by => @last_error)
+        end
+      raise_with_last_cause(error) if error
+    end
+    def num_errors_in_window
+      @window_errors.count(&:itself).to_f
+    end
+    def increment_window_index
+      @window_index = (@window_index + 1) % @window_size
+    end
+    def raise_with_last_cause(error)
+      raise @last_error
+    rescue StandardError
+      raise error
+    end
+  end
+end

data/lib/utility/errors.rb CHANGED Viewed

@@ -60,18 +60,6 @@ module Utility
   class JobDocumentLimitError < StandardError; end
   class JobClaimingError < StandardError; end
-  class MonitoringError < StandardError
-    attr_accessor :tripped_by
-    def initialize(message = nil, tripped_by: nil)
-      super("#{message}#{tripped_by.present? ? " Tripped by - #{tripped_by.class}: #{tripped_by.message}" : ''}")
-      @tripped_by = tripped_by
-    end
-  end
-  class MaxSuccessiveErrorsExceededError < MonitoringError; end
-  class MaxErrorsExceededError < MonitoringError; end
-  class MaxErrorsInWindowExceededError < MonitoringError; end
   class JobSyncNotPossibleYetError < StandardError
     attr_accessor :sync_will_be_possible_at