RubyGems - webhookdb - Versions diffs - 1.3.1 → 1.5.0 - Mend

webhookdb 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

checksums.yaml +4 -4
data/admin-dist/assets/{index-6aebf805.js → index-9306dd28.js} +39 -39
data/admin-dist/index.html +1 -1
data/data/messages/templates/errors/generic_backfill.email.liquid +30 -0
data/data/messages/templates/errors/icalendar_fetch.email.liquid +8 -2
data/data/messages/templates/specs/with_fields.email.liquid +6 -0
data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
data/db/migrations/032_remove_db_defaults.rb +2 -0
data/db/migrations/043_text_search.rb +2 -0
data/db/migrations/045_system_log.rb +15 -0
data/db/migrations/046_indices.rb +14 -0
data/db/migrations/047_sync_parallelism.rb +9 -0
data/db/migrations/048_sync_stats.rb +9 -0
data/db/migrations/049_error_handlers.rb +18 -0
data/db/migrations/050_logged_webhook_indices.rb +25 -0
data/db/migrations/051_partitioning.rb +9 -0
data/integration/async_spec.rb +0 -2
data/integration/service_integrations_spec.rb +0 -2
data/lib/amigo/durable_job.rb +2 -2
data/lib/amigo/job_in_context.rb +12 -0
data/lib/webhookdb/admin.rb +6 -0
data/lib/webhookdb/admin_api/data_provider.rb +1 -0
data/lib/webhookdb/admin_api/entities.rb +8 -0
data/lib/webhookdb/aggregate_result.rb +1 -1
data/lib/webhookdb/api/entities.rb +6 -2
data/lib/webhookdb/api/error_handlers.rb +104 -0
data/lib/webhookdb/api/helpers.rb +25 -1
data/lib/webhookdb/api/icalproxy.rb +22 -0
data/lib/webhookdb/api/install.rb +2 -1
data/lib/webhookdb/api/organizations.rb +6 -0
data/lib/webhookdb/api/saved_queries.rb +1 -0
data/lib/webhookdb/api/saved_views.rb +1 -0
data/lib/webhookdb/api/service_integrations.rb +2 -1
data/lib/webhookdb/api/sync_targets.rb +1 -1
data/lib/webhookdb/api/system.rb +5 -0
data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
data/lib/webhookdb/api.rb +4 -1
data/lib/webhookdb/apps.rb +4 -0
data/lib/webhookdb/async/autoscaler.rb +10 -0
data/lib/webhookdb/async/job.rb +4 -0
data/lib/webhookdb/async/scheduled_job.rb +4 -0
data/lib/webhookdb/async.rb +2 -0
data/lib/webhookdb/backfiller.rb +17 -4
data/lib/webhookdb/concurrent.rb +96 -0
data/lib/webhookdb/connection_cache.rb +57 -10
data/lib/webhookdb/console.rb +1 -1
data/lib/webhookdb/customer/reset_code.rb +1 -1
data/lib/webhookdb/customer.rb +5 -4
data/lib/webhookdb/database_document.rb +1 -1
data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
data/lib/webhookdb/db_adapter/partition.rb +14 -0
data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
data/lib/webhookdb/db_adapter/pg.rb +77 -5
data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
data/lib/webhookdb/db_adapter.rb +25 -3
data/lib/webhookdb/dbutil.rb +2 -0
data/lib/webhookdb/errors.rb +34 -0
data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
data/lib/webhookdb/http.rb +30 -16
data/lib/webhookdb/icalendar.rb +30 -9
data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
data/lib/webhookdb/jobs/backfill.rb +21 -25
data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
data/lib/webhookdb/jobs/deprecated_jobs.rb +3 -0
data/lib/webhookdb/jobs/emailer.rb +2 -1
data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
data/lib/webhookdb/jobs/{logged_webhook_replay.rb → logged_webhooks_replay.rb} +5 -3
data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +112 -0
data/lib/webhookdb/jobs/monitor_metrics.rb +29 -0
data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
data/lib/webhookdb/jobs/process_webhook.rb +11 -12
data/lib/webhookdb/jobs/renew_watch_channel.rb +10 -10
data/lib/webhookdb/jobs/replication_migration.rb +5 -2
data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
data/lib/webhookdb/jobs/send_invite.rb +3 -2
data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
data/lib/webhookdb/jobs/send_webhook.rb +4 -5
data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
data/lib/webhookdb/jobs/{webhook_subscription_delivery_attempt.rb → webhook_subscription_delivery_event.rb} +5 -8
data/lib/webhookdb/liquid/expose.rb +1 -1
data/lib/webhookdb/liquid/filters.rb +1 -1
data/lib/webhookdb/liquid/partial.rb +2 -2
data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
data/lib/webhookdb/logged_webhook.rb +16 -2
data/lib/webhookdb/message/email_transport.rb +1 -1
data/lib/webhookdb/message/transport.rb +1 -1
data/lib/webhookdb/message.rb +55 -4
data/lib/webhookdb/messages/error_generic_backfill.rb +47 -0
data/lib/webhookdb/messages/error_icalendar_fetch.rb +5 -0
data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
data/lib/webhookdb/messages/specs.rb +16 -0
data/lib/webhookdb/organization/alerting.rb +56 -6
data/lib/webhookdb/organization/database_migration.rb +2 -2
data/lib/webhookdb/organization/db_builder.rb +5 -4
data/lib/webhookdb/organization/error_handler.rb +141 -0
data/lib/webhookdb/organization.rb +76 -10
data/lib/webhookdb/postgres/model.rb +1 -0
data/lib/webhookdb/postgres/model_utilities.rb +2 -0
data/lib/webhookdb/postgres.rb +3 -4
data/lib/webhookdb/replicator/base.rb +202 -68
data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
data/lib/webhookdb/replicator/column.rb +2 -0
data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
data/lib/webhookdb/replicator/fake.rb +106 -88
data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +131 -61
data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +197 -32
data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
data/lib/webhookdb/replicator/intercom_v1_mixin.rb +49 -6
data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
data/lib/webhookdb/replicator/signalwire_message_v1.rb +31 -1
data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
data/lib/webhookdb/replicator/transistor_episode_v1.rb +11 -5
data/lib/webhookdb/replicator/webhook_request.rb +8 -0
data/lib/webhookdb/replicator.rb +6 -3
data/lib/webhookdb/service/helpers.rb +4 -0
data/lib/webhookdb/service/middleware.rb +6 -2
data/lib/webhookdb/service/view_api.rb +1 -1
data/lib/webhookdb/service.rb +10 -10
data/lib/webhookdb/service_integration.rb +19 -1
data/lib/webhookdb/signalwire.rb +1 -1
data/lib/webhookdb/spec_helpers/async.rb +0 -4
data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +239 -64
data/lib/webhookdb/spec_helpers.rb +1 -0
data/lib/webhookdb/sync_target.rb +202 -34
data/lib/webhookdb/system_log_event.rb +9 -0
data/lib/webhookdb/tasks/admin.rb +1 -1
data/lib/webhookdb/tasks/annotate.rb +1 -1
data/lib/webhookdb/tasks/db.rb +13 -1
data/lib/webhookdb/tasks/docs.rb +1 -1
data/lib/webhookdb/tasks/fixture.rb +1 -1
data/lib/webhookdb/tasks/message.rb +1 -1
data/lib/webhookdb/tasks/regress.rb +1 -1
data/lib/webhookdb/tasks/release.rb +1 -1
data/lib/webhookdb/tasks/sidekiq.rb +1 -1
data/lib/webhookdb/tasks/specs.rb +1 -1
data/lib/webhookdb/version.rb +1 -1
data/lib/webhookdb/webhook_subscription.rb +3 -4
data/lib/webhookdb.rb +34 -8
metadata +114 -64
data/lib/webhookdb/jobs/customer_created_notify_internal.rb +0 -22
data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
/data/lib/webhookdb/jobs/{logged_webhook_resilient_replay.rb → logged_webhooks_resilient_replay.rb} +0 -0
/data/lib/webhookdb/jobs/{webhook_resource_notify_integrations.rb → webhookdb_resource_notify_integrations.rb} +0 -0

data/lib/webhookdb/replicator/base.rb CHANGED Viewed

@@ -62,7 +62,7 @@ class Webhookdb::Replicator::Base
   # and the arguments used to upsert it (arguments to upsert_webhook),
   # and should return the body string to respond back with.
   #
-  # @param [Hash] upserted
+  # @param [Hash,Array] upserted
   # @param [Webhookdb::Replicator::WebhookRequest] request
   # @return [String]
   def synchronous_processing_response_body(upserted:, request:)
@@ -318,10 +318,12 @@ for information on how to refresh data.)
   # Find a dependent service integration with the given service name.
   # If none are found, return nil. If multiple are found, raise,
   # as this should only be used for automatically managed integrations.
+  # @param service_name [String,Array<String>]
   # @return [Webhookdb::ServiceIntegration,nil]
   def find_dependent(service_name)
-    sints = self.service_integration.dependents.filter { |si| si.service_name == service_name }
-    raise Webhookdb::InvalidPrecondition, "there are multiple #{service_name} integrations in dependents" if
+    names = service_name.respond_to?(:to_ary) ? service_name : [service_name]
+    sints = self.service_integration.dependents.filter { |si| names.include?(si.service_name) }
+    raise Webhookdb::InvalidPrecondition, "there are multiple #{names.join('/')} integrations in dependents" if
       sints.length > 1
     return sints.first
   end
@@ -356,7 +358,9 @@ for information on how to refresh data.)
     columns << self.data_column
     adapter = Webhookdb::DBAdapter::PG.new
     result = Webhookdb::Replicator::SchemaModification.new
-    result.transaction_statements << adapter.create_table_sql(table, columns, if_not_exists:)
+    create_table = adapter.create_table_sql(table, columns, if_not_exists:, partition: self.partitioning)
+    result.transaction_statements << create_table
+    result.transaction_statements.concat(self.create_table_partitions(adapter))
     self.indices(table).each do |dbindex|
       result.transaction_statements << adapter.create_index_sql(dbindex, concurrently: false)
     end
@@ -364,33 +368,69 @@ for information on how to refresh data.)
     return result
   end
+  # True if the replicator uses partitioning.
+  def partition? = false
+  # Non-nil only if +partition?+ is true.
+  # @return [Webhookdb::DBAdapter::Partitioning,nil]
+  def partitioning = nil
+  # Return the partitions belonging to the table.
+  # Return an empty array if this replicator is not partitioned.
+  # @return [Array<Webhookdb::DBAdapter::Partition>]
+  def existing_partitions(_db)
+    raise NotImplementedError if self.partition?
+    return []
+  end
+  def create_table_partitions(adapter)
+    return [] unless self.partition?
+    # We only need create_table partitions when we create the table.
+    # Range partitions would be created on demand, when inserting rows and the partition doesn't exist.
+    return [] unless self.partitioning.by == Webhookdb::DBAdapter::Partitioning::HASH
+    max_partition = self.service_integration.partition_value
+    raise Webhookdb::InvalidPrecondition, "partition value must be positive" unless max_partition.positive?
+    stmts = (0...max_partition).map do |i|
+      adapter.create_hash_partition_sql(self.dbadapter_table, max_partition, i)
+    end
+    return stmts
+  end
   # We need to give indices a persistent name, unique across the schema,
   # since multiple indices within a schema cannot share a name.
   #
   # Note that in certain RDBMS (Postgres) index names cannot exceed a certian length;
   # Postgres will silently truncate them. This can result in an index not being created
-  # if it shares the same name as another index and we use 'CREATE INDEX IF NOT EXISTS.'
+  # if it shares the same name as another index, and we use 'CREATE INDEX IF NOT EXISTS.'
   #
   # To avoid this, if the generated name exceeds a certain size, an md5 hash of the column names is used.
   #
   # @param columns [Array<Webhookdb::DBAdapter::Column, Webhookdb::Replicator::Column>] Must respond to :name.
+  # @param identifier [String,nil] Use this instead of a combination of column names.
+  #   Only use this where multiple indices are needed for the same columns, but something like the 'where'
+  #   condition is different.
   # @return [String]
-  protected def index_name(columns)
+  protected def index_name(columns, identifier: nil)
     raise Webhookdb::InvalidPrecondition, "sint needs an opaque id" if self.service_integration.opaque_id.blank?
     colnames = columns.map(&:name).join("_")
     opaque_id = self.service_integration.opaque_id
     # Handle old IDs without the leading 'svi_'.
     opaque_id = "idx#{opaque_id}" if /\d/.match?(opaque_id[0])
-    name = "#{opaque_id}_#{colnames}_idx"
-    if name.size > MAX_INDEX_NAME_LENGTH
-      # We don't have the 32 extra chars for a full md5 hash.
-      # We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
-      # So just lop off a few characters (normally 2) from the end of the md5.
-      # The collision space is so small (some combination of column names would need to have the
-      # same md5, which is unfathomable), we're not really worried about it.
-      colnames_md5 = Digest::MD5.hexdigest(colnames)
-      available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
-      name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
+    if identifier
+      name = "#{opaque_id}_#{identifier}_idx"
+    else
+      name = "#{opaque_id}_#{colnames}_idx"
+      if name.size > MAX_INDEX_NAME_LENGTH
+        # We don't have the 32 extra chars for a full md5 hash.
+        # We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
+        # So just lop off a few characters (normally 2) from the end of the md5.
+        # The collision space is so small (some combination of column names would need to have the
+        # same md5, which is unfathomable), we're not really worried about it.
+        colnames_md5 = Digest::MD5.hexdigest(colnames)
+        available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
+        name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
+      end
     end
     raise Webhookdb::InvariantViolation, "index names cannot exceed 63 chars, got #{name.size} in '#{name}'" if
       name.size > 63
@@ -406,7 +446,12 @@ for information on how to refresh data.)
   # @return [Webhookdb::DBAdapter::Column]
   def remote_key_column
-    return self._remote_key_column.to_dbadapter(unique: true, nullable: false)
+    c = self._remote_key_column
+    if c.index?
+      msg = "_remote_key_column index:true should not be set, since it automatically gets a unique index"
+      Kernel.warn msg
+    end
+    return c.to_dbadapter(unique: true, nullable: false, index: false)
   end
   # @return [Webhookdb::DBAdapter::Column]
@@ -465,6 +510,9 @@ for information on how to refresh data.)
   # Each integration needs a single remote key, like the Shopify order id for shopify orders,
   # or sid for Twilio resources. This column must be unique for the table, like a primary key.
   #
+  # NOTE: Do not set index:true. The remote key column always must be unique,
+  # so it gets a unique index automatically.
+  #
   # @abstract
   # @return [Webhookdb::Replicator::Column]
   def _remote_key_column
@@ -495,9 +543,16 @@ for information on how to refresh data.)
     end
     self._extra_index_specs.each do |spec|
       targets = spec.columns.map { |n| dba_cols_by_name.fetch(n) }
-      idx_name = self.index_name(targets)
+      idx_name = self.index_name(targets, identifier: spec.identifier)
       result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: spec.where)
     end
+    index_names = result.map(&:name)
+    if (dupes = index_names.find_all.with_index { |n, idx| idx != index_names.rindex(n) }).any?
+      msg = "Duplicate index names detected. Use the 'name' attribute to differentiate: " +
+        dupes.map(&:to_s).join(", ")
+      raise Webhookdb::Replicator::BrokenSpecification, msg
+    end
     return result
   end
@@ -520,7 +575,7 @@ for information on how to refresh data.)
   # @return [Webhookdb::Replicator::SchemaModification]
   def ensure_all_columns_modification
-    existing_cols, existing_indices = nil
+    existing_cols, existing_indices, existing_partitions = nil
     max_pk = 0
     sint = self.service_integration
     self.admin_dataset do |ds|
@@ -531,6 +586,7 @@ for information on how to refresh data.)
         tablename: sint.table_name,
       ).select_map(:indexname).to_set
       max_pk = ds.max(:pk) || 0
+      existing_partitions = self.existing_partitions(ds.db)
     end
     adapter = Webhookdb::DBAdapter::PG.new
     table = self.dbadapter_table
@@ -577,7 +633,9 @@ for information on how to refresh data.)
     # Add missing indices. This should happen AFTER the UPDATE calls so the UPDATEs don't have to update indices.
     self.indices(table).map do |index|
       next if existing_indices.include?(index.name.to_s)
-      result.nontransaction_statements << adapter.create_index_sql(index, concurrently: true)
+      result.nontransaction_statements.concat(
+        adapter.create_index_sqls(index, concurrently: true, partitions: existing_partitions),
+      )
     end
     result.application_database_statements << sint.ensure_sequence_sql if self.requires_sequence?
@@ -641,6 +699,7 @@ for information on how to refresh data.)
   # like when we have to take different action based on a request method.
   #
   # @param body [Hash]
+  # @return [Array,Hash] Inserted rows, or array of inserted rows if many.
   def upsert_webhook_body(body, **kw)
     return self.upsert_webhook(Webhookdb::Replicator::WebhookRequest.new(body:), **kw)
   end
@@ -649,10 +708,14 @@ for information on how to refresh data.)
   # NOT a Rack::Request.
   #
   # @param [Webhookdb::Replicator::WebhookRequest] request
+  # @return [Array,Hash] Inserted rows, or array of inserted rows if many.
   def upsert_webhook(request, **kw)
     return self._upsert_webhook(request, **kw)
+  rescue Amigo::Retry::Error
+    # Do not log this since it's expected/handled by Amigo
+    raise
   rescue StandardError => e
-    self.logger.error("upsert_webhook_error", request: request.as_json, error: e)
+    self.logger.error("upsert_webhook_error", {request: request.as_json}, e)
     raise
   end
@@ -661,9 +724,23 @@ for information on how to refresh data.)
   #
   # @param request [Webhookdb::Replicator::WebhookRequest]
   # @param upsert [Boolean] If false, just return what would be upserted.
+  # @return [Array,Hash] Inserted rows, or array of inserted rows if many.
   def _upsert_webhook(request, upsert: true)
-    resource, event = self._resource_and_event(request)
-    return nil if resource.nil?
+    resource_or_list, event = self._resource_and_event(request)
+    return nil if resource_or_list.nil?
+    if resource_or_list.is_a?(Array)
+      unless event.nil?
+        msg = "resource_and_event cannot return an array of resources with a non-nil event"
+        raise Webhookdb::InvalidPostcondition, msg
+      end
+      return resource_or_list.map do |resource|
+        self._upsert_webhook_single_resource(request, resource:, event:, upsert:)
+      end
+    end
+    return self._upsert_webhook_single_resource(request, resource: resource_or_list, event:, upsert:)
+  end
+  def _upsert_webhook_single_resource(request, resource:, event:, upsert:)
     enrichment = self._fetch_enrichment(resource, event, request)
     prepared = self._prepare_for_insert(resource, event, request, enrichment)
     raise Webhookdb::InvalidPostcondition if prepared.key?(:data)
@@ -673,12 +750,11 @@ for information on how to refresh data.)
     inserting[:enrichment] = self._to_json(enrichment) if self._store_enrichment_body?
     inserting.merge!(prepared)
     return inserting unless upsert
-    remote_key_col = self._remote_key_column
     updating = self._upsert_update_expr(inserting, enrichment:)
     update_where = self._update_where_expr
     upserted_rows = self.admin_dataset(timeout: :fast) do |ds|
       ds.insert_conflict(
-        target: remote_key_col.name,
+        target: self._upsert_conflict_target,
         update: updating,
         update_where:,
       ).insert(inserting)
@@ -689,6 +765,12 @@ for information on how to refresh data.)
     return inserting
   end
+  # The target for ON CONFLICT. Usually the remote key column name,
+  # except if the remote id is a compound unique index, like for partitioned tables.
+  # Can be a symbol, array of symbols representing the column names, a +Sequel.lit+, etc.
+  # See +Sequel::Dataset.insert_conflict+ :target option for details.
+  def _upsert_conflict_target = self._remote_key_column.name
   # The NULL ASCII character (\u0000), when present in a string ("\u0000"),
   # and then encoded into JSON ("\\u0000") is invalid in PG JSONB- its strings cannot contain NULLs
   # (note that JSONB does not store the encoded string verbatim, it parses it into PG types, and a PG string
@@ -793,7 +875,7 @@ for information on how to refresh data.)
   #
   # @abstract
   # @param [Webhookdb::Replicator::WebhookRequest] request
-  # @return [Array<Hash>,nil]
+  # @return [Array<Hash,Array>,nil]
   def _resource_and_event(request)
     raise NotImplementedError
   end
@@ -903,10 +985,10 @@ for information on how to refresh data.)
   # - The table OID for this replicator
   # - The given key
   #
-  # Note this this establishes a new DB connection for the advisory lock;
+  # Note this establishes a new DB connection for the advisory lock;
   # we have had issues with advisory locks on reused connections,
   # and this is safer than having a lock that is never released.
-  protected def with_advisory_lock(key, &)
+  def with_advisory_lock(key, &)
     url = self.service_integration.organization.admin_connection_url_raw
     got = nil
     Webhookdb::Dbutil.borrow_conn(url) do |conn|
@@ -966,7 +1048,7 @@ for information on how to refresh data.)
     rescue TypeError, NoMethodError => e
       # if we don't incur an HTTP error, but do incur an Error due to differences in the shapes of anticipated
       # response data in the `fetch_backfill_page` function, we can assume that the credentials are okay
-      self.logger.info "verify_backfill_credentials_expected_failure", error: e
+      self.logger.info "verify_backfill_credentials_expected_failure", e
       return CredentialVerificationResult.new(verified: true, message: "")
     end
     return CredentialVerificationResult.new(verified: true, message: "")
@@ -999,42 +1081,18 @@ for information on how to refresh data.)
     job.update(started_at: Time.now)
     backfillers = self._backfillers(**job.criteria.symbolize_keys)
-    if self._parallel_backfill && self._parallel_backfill > 1
-      # Create a dedicated threadpool for these backfillers,
-      # with max parallelism determined by the replicator.
-      pool = Concurrent::FixedThreadPool.new(self._parallel_backfill)
-      # Record any errors that occur, since they won't raise otherwise.
-      # Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
-      errors = Array.new(backfillers.size)
-      backfillers.each_with_index do |bf, idx|
-        pool.post do
-          bf.backfill(last_backfilled)
-        rescue StandardError => e
-          errors[idx] = e
-        end
-      end
-      # We've enqueued all backfillers; do not accept anymore work.
-      pool.shutdown
-      loop do
-        # We want to stop early if we find an error, so check for errors every 10 seconds.
-        completed = pool.wait_for_termination(10)
-        first_error = errors.find { |e| !e.nil? }
-        if first_error.nil?
-          # No error, and wait_for_termination returned true, so all work is done.
-          break if completed
-          # No error, but work is still going on, so loop again.
-          next
-        end
-        # We have an error; don't run any more backfillers.
-        pool.kill
-        # Wait for all ongoing backfills before raising.
-        pool.wait_for_termination
-        raise first_error
+    begin
+      if self._parallel_backfill && self._parallel_backfill > 1
+        _do_parallel_backfill(backfillers, last_backfilled)
+      else
+        _do_serial_backfill(backfillers, last_backfilled)
       end
-    else
-      backfillers.each do |backfiller|
-        backfiller.backfill(last_backfilled)
+    rescue StandardError => e
+      if self.on_backfill_error(e) == true
+        job.update(finished_at: Time.now)
+        return
       end
+      raise e
     end
     sint.update(last_backfilled_at: new_last_backfilled) if job.incremental?
@@ -1042,6 +1100,54 @@ for information on how to refresh data.)
     job.enqueue_children
   end
+  protected def _do_parallel_backfill(backfillers, last_backfilled)
+    # Create a dedicated threadpool for these backfillers,
+    # with max parallelism determined by the replicator.
+    pool = Concurrent::FixedThreadPool.new(self._parallel_backfill)
+    # Record any errors that occur, since they won't raise otherwise.
+    # Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
+    errors = Array.new(backfillers.size)
+    backfillers.each_with_index do |bf, idx|
+      pool.post do
+        bf.backfill(last_backfilled)
+      rescue StandardError => e
+        errors[idx] = e
+      end
+    end
+    # We've enqueued all backfillers; do not accept anymore work.
+    pool.shutdown
+    loop do
+      # We want to stop early if we find an error, so check for errors every 10 seconds.
+      completed = pool.wait_for_termination(10)
+      first_error = errors.find { |e| !e.nil? }
+      if first_error.nil?
+        # No error, and wait_for_termination returned true, so all work is done.
+        break if completed
+        # No error, but work is still going on, so loop again.
+        next
+      end
+      # We have an error; don't run any more backfillers.
+      pool.kill
+      # Wait for all ongoing backfills before raising.
+      pool.wait_for_termination
+      raise first_error
+    end
+  end
+  protected def _do_serial_backfill(backfillers, last_backfilled)
+    backfillers.each do |backfiller|
+      backfiller.backfill(last_backfilled)
+    end
+  end
+  # Called when the #backfill method errors.
+  # This can do something like dispatch a developer alert.
+  # The handler must raise in order to stop the job from processing-
+  # if nothing is raised, the original exception will be raised instead.
+  # By default, this method noops, so the original exception is raised.
+  # @param e [Exception]
+  def on_backfill_error(e) = nil
   # If this replicator supports backfilling in parallel (running multiple backfillers at a time),
   # return the degree of paralellism (or nil if not running in parallel).
   # We leave parallelism up to the replicator, not CPU count, since most work
@@ -1096,15 +1202,15 @@ for information on how to refresh data.)
     def fetch_backfill_page(pagination_token, last_backfilled:)
       return @svc._fetch_backfill_page(pagination_token, last_backfilled:)
-    rescue ::Timeout::Error, ::SocketError
-      self.__retryordie
+    rescue ::Timeout::Error, ::SocketError => e
+      self.__retryordie(e)
     rescue Webhookdb::Http::Error => e
-      self.__retryordie if e.status >= 500
+      self.__retryordie(e) if e.status >= 500
       raise
     end
-    def __retryordie
-      raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff)
+    def __retryordie(e)
+      raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff, e)
     end
   end
@@ -1163,6 +1269,34 @@ or leave blank to choose the first option.
     return self._webhook_endpoint
   end
+  # Avoid writes under the following conditions:
+  #
+  # - A table lock is taken on the table
+  # - A vacuum is in progress on the table
+  #
+  # Of course, in most situations we want to write anyway,
+  # but there are some cases (lower-priority replicators for example)
+  # where we can reschedule the job to happen in the future instead.
+  def avoid_writes?
+    # We will need to handle this differently when not under Postgres, but for now,
+    # just assume Postgres.
+    # Find the admin URL for the organization's server (NOT the organization admin url, it can't see system processes).
+    # Then check for 1) vacuums in progress, 2) locks.
+    self.service_integration.organization.readonly_connection do |db|
+      count = db[:pg_locks].
+        join(:pg_class, {oid: :relation}).
+        join(:pg_namespace, {oid: :relnamespace}).
+        where(
+          locktype: "relation",
+          nspname: self.service_integration.organization.replication_schema,
+          relname: self.service_integration.table_name,
+          mode: ["ShareUpdateExclusiveLock", "ExclusiveLock", "AccessExclusiveLock"],
+        ).limit(1).count
+      return true if count&.positive?
+    end
+    return false
+  end
   protected def _webhook_endpoint
     return self.service_integration.unauthed_webhook_endpoint
   end

data/lib/webhookdb/replicator/base_stale_row_deleter.rb ADDED Viewed

@@ -0,0 +1,165 @@
+# frozen_string_literal: true
+# Delete stale rows (like cancelled calendar events) not updated (row_updated_at or whatever column)
+# in the window between +stale_at+ back to +lookback_window+.
+# This avoids endlessly adding to a table where we expect rows to become stale over time.
+class Webhookdb::Replicator::BaseStaleRowDeleter
+  # @return [Webhookdb::Replicator::Base]
+  attr_reader :replicator
+  def initialize(replicator)
+    @replicator = replicator
+  end
+  # When a row is considered 'stale'.
+  # For example, a value of +35.days+ would treat any row older than 35 days as stale.
+  # @return [ActiveSupport::Duration]
+  def stale_at
+    raise NotImplementedError
+  end
+  # How far from +stale_at+ to "look back" for stale rows.
+  # We cannot just use "row_updated_at < stale_at" since this would scan ALL the rows
+  # every time we delete rows. Instead, we only want to scale rows where
+  # "row_updated_at < stale_at AND row_updated_at > (stale_at - lookback_window)".
+  # For example, a +stale_at+ of 20 days and a +lookback_window+ of 7 days
+  # would look to delete rows 20 to 27 days old.
+  #
+  # If the stale row deleter is run daily, a good lookback window would be 2-3 days,
+  # since as long as the job is running we shouldn't find rows that aren't cleaned up.
+  #
+  # Use +run_initial+ to do a full table scan,
+  # which may be necessary when running this feature for a table for the first time.
+  # @return [ActiveSupport::Duration]
+  def lookback_window
+    raise NotImplementedError
+  end
+  # Name of the column, like +:row_updated_at+.
+  # @return [Symbol]
+  def updated_at_column
+    raise NotImplementedError
+  end
+  # Other additional 'stale' conditions, like {status: 'cancelled'}
+  # @return [Hash]
+  def stale_condition
+    raise NotImplementedError
+  end
+  # The row delete is done in chunks to avoid long locks.
+  # The default seems safe, but it's exposed if you need to play around with it,
+  # and can be done via configuration if needed at some point.
+  # @return [Integer]
+  def chunk_size = 10_000
+  # How small should the incremental lookback window be? See +run+ for details.
+  # A size of 1 hour, and a lookback window of 2 days, would yield at least 48 delete queries.
+  def incremental_lookback_size = 1.hour
+  # Run the deleter.
+  # @param lookback_window [nil,ActiveSupport::Duration] The lookback window
+  #   (how many days before +stale_cutoff+ to look for rows). Use +nil+ to look for all rows.
+  def run(lookback_window: self.lookback_window)
+    # The algorithm to delete stale rows is complex for a couple of reasons.
+    # The native solution is "delete rows where updated_at > (stale_at - lookback_window) AND updated_at < stale_at"
+    # However, this would cause a single massive query over the entire candidate row space,
+    # which has problems:
+    # - The query can be very slow
+    # - Deadlocks can happen due to the slow query.
+    # - If the query is interrupted (due to a worker restart), all progress is lost.
+    # - Scanning the large 'updated at timestamp' index can cause the database to do a sequential scan.
+    #
+    # Instead, we need to do issue a series of fast queries over small 'updated at' windows:
+    #
+    # - Break the lookback period into hour-long windows.
+    #   If the lookback_window is 2 days, this would issue 48 queries.
+    #   But each one would be very fast, since the column is indexed.
+    # - For each small window, delete in chunks, like:
+    #      DELETE from "public"."icalendar_event_v1_aaaa"
+    #      WHERE pk IN (
+    #        SELECT pk FROM "public"."icalendar_event_v1_aaaa"
+    #        WHERE row_updated_at >= (hour start)
+    #        AND row_updated_at < (hour end)
+    #        LIMIT (chunk size)
+    #      )
+    # - Issue each DELETE within a transaction with seqscan disabled.
+    #   This is crude, but we know for our usage case that we never want a seqscan.
+    # - Using the chunked delete with the hour-long (small-sized) windows
+    #   is important. Because each chunk requires scanning potentially the entire indexed row space,
+    #   it would take longer and longer to find 10k rows to fill the chunk.
+    #   This is, for example, the same performance problem that OFFSET/LIMIT pagination
+    #   has at later pages (but not earlier pages).
+    self.replicator.admin_dataset do |ds|
+      stale_window_late = Time.now - self.stale_at
+      stale_window_early = lookback_window.nil? ? ds.min(self.updated_at_column) : stale_window_late - lookback_window
+      # If we are querying the whole table (no lookback window), and have no rows,
+      # there's nothing to clean up.
+      break if stale_window_early.nil?
+      # We must disable vacuuming for this sort of cleanup.
+      # Otherwise, it will take a LONG time since we use a series of short deletes.
+      self.set_autovacuum(ds.db, false)
+      if self.replicator.partition?
+        # If the replicator is partitioned, we need to delete stale rows on partition separately.
+        # We DELETE with a LIMIT in chunks, but when we run this on the main table, it'll run the query
+        # on every partition BEFORE applying the limit. You'll see this manifest with speed,
+        # but also the planner using a sequential scan for the delete, rather than hitting an index.
+        # Instead, DELETE from each partition in chunks, which will use the indices, and apply the limit properly.
+        self.replicator.existing_partitions(ds.db).each do |p|
+          pdb = ds.db[self.replicator.qualified_table_sequel_identifier(table: p.partition_name)]
+          self._run_delete(pdb, stale_window_early:, stale_window_late:)
+        end
+      else
+        self._run_delete(ds, stale_window_early:, stale_window_late:)
+      end
+    end
+  ensure
+    # Open a new connection in case the previous one is trashed for whatever reason.
+    self.replicator.admin_dataset do |ds|
+      self.set_autovacuum(ds.db, true)
+    end
+  end
+  def _run_delete(ds, stale_window_early:, stale_window_late:)
+    base_ds = ds.where(self.stale_condition).limit(self.chunk_size).select(:pk)
+    window_start = stale_window_early
+    until window_start >= stale_window_late
+      window_end = window_start + self.incremental_lookback_size
+      inner_ds = base_ds.where(self.updated_at_column => window_start..window_end)
+      loop do
+        # Due to conflicts where a feed is being inserted while the delete is happening,
+        # this may raise an error like:
+        #   deadlock detected
+        #   DETAIL:  Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
+        #   Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
+        #   HINT:  See server log for query details.
+        #   CONTEXT:  while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
+        # So we don't explicitly handle deadlocks, but could if it becomes an issue.
+        delete_ds = ds.where(pk: inner_ds)
+        # Disable seqscan for the delete. We can end up with seqscans if the planner decides
+        # it's a better choice given the 'updated at' index, but for our purposes we know
+        # we never want to use it (the impact is negligible on small tables,
+        # and catastrophic on large tables).
+        sql_lines = [
+          "BEGIN",
+          "SET LOCAL enable_seqscan='off'",
+          delete_ds.delete_sql,
+          "COMMIT",
+        ]
+        deleted = ds.db << sql_lines.join(";\n")
+        break if deleted != self.chunk_size
+      end
+      window_start = window_end
+    end
+  end
+  def set_autovacuum(db, on)
+    return if self.replicator.partition?
+    arg = on ? "on" : "off"
+    db << "ALTER TABLE #{self.replicator.schema_and_table_symbols.join('.')} SET (autovacuum_enabled='#{arg}')"
+  end
+  # Run with +lookback_window+ as +nil+, which does a full table scan.
+  def run_initial = self.run(lookback_window: nil)
+end

data/lib/webhookdb/replicator/column.rb CHANGED Viewed

@@ -349,6 +349,8 @@ class Webhookdb::Replicator::Column
   # If provided, use this expression as the UPDATE value when adding the column
   # to an existing table.
+  # To explicitly backfill using NULL, use the value +Sequel[nil]+
+  # rather than +nil+.
   # @return [String,Sequel,Sequel::SQL::Expression]
   attr_reader :backfill_expr

data/lib/webhookdb/replicator/email_octopus_contact_v1.rb CHANGED Viewed

@@ -29,7 +29,6 @@ class Webhookdb::Replicator::EmailOctopusContactV1 < Webhookdb::Replicator::Base
       :compound_identity,
       TEXT,
       data_key: "<compound key, see converter>",
-      index: true,
       optional: true,
       converter: CONV_REMOTE_KEY,
     )