webhookdb 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
- data/db/migrations/032_remove_db_defaults.rb +2 -0
- data/db/migrations/043_text_search.rb +2 -0
- data/db/migrations/047_sync_parallelism.rb +9 -0
- data/db/migrations/048_sync_stats.rb +9 -0
- data/db/migrations/049_error_handlers.rb +18 -0
- data/db/migrations/050_logged_webhook_indices.rb +25 -0
- data/db/migrations/051_partitioning.rb +9 -0
- data/integration/async_spec.rb +0 -2
- data/integration/service_integrations_spec.rb +0 -2
- data/lib/amigo/durable_job.rb +2 -2
- data/lib/amigo/job_in_context.rb +12 -0
- data/lib/webhookdb/api/entities.rb +6 -2
- data/lib/webhookdb/api/error_handlers.rb +104 -0
- data/lib/webhookdb/api/helpers.rb +8 -1
- data/lib/webhookdb/api/icalproxy.rb +22 -0
- data/lib/webhookdb/api/install.rb +2 -1
- data/lib/webhookdb/api/saved_queries.rb +1 -0
- data/lib/webhookdb/api/saved_views.rb +1 -0
- data/lib/webhookdb/api/service_integrations.rb +1 -1
- data/lib/webhookdb/api/sync_targets.rb +1 -1
- data/lib/webhookdb/api/system.rb +5 -0
- data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
- data/lib/webhookdb/api.rb +4 -1
- data/lib/webhookdb/apps.rb +4 -0
- data/lib/webhookdb/async/autoscaler.rb +10 -0
- data/lib/webhookdb/async/job.rb +4 -0
- data/lib/webhookdb/async/scheduled_job.rb +4 -0
- data/lib/webhookdb/async.rb +2 -0
- data/lib/webhookdb/backfiller.rb +17 -4
- data/lib/webhookdb/concurrent.rb +96 -0
- data/lib/webhookdb/connection_cache.rb +29 -8
- data/lib/webhookdb/customer.rb +2 -2
- data/lib/webhookdb/database_document.rb +1 -1
- data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
- data/lib/webhookdb/db_adapter/partition.rb +14 -0
- data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
- data/lib/webhookdb/db_adapter/pg.rb +77 -5
- data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
- data/lib/webhookdb/db_adapter.rb +24 -2
- data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
- data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
- data/lib/webhookdb/http.rb +29 -15
- data/lib/webhookdb/icalendar.rb +30 -9
- data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
- data/lib/webhookdb/jobs/backfill.rb +21 -25
- data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
- data/lib/webhookdb/jobs/deprecated_jobs.rb +2 -0
- data/lib/webhookdb/jobs/emailer.rb +2 -1
- data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
- data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
- data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
- data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
- data/lib/webhookdb/jobs/logged_webhooks_replay.rb +5 -3
- data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
- data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +7 -0
- data/lib/webhookdb/jobs/monitor_metrics.rb +1 -1
- data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
- data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
- data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
- data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
- data/lib/webhookdb/jobs/process_webhook.rb +11 -12
- data/lib/webhookdb/jobs/renew_watch_channel.rb +7 -10
- data/lib/webhookdb/jobs/replication_migration.rb +5 -2
- data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
- data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
- data/lib/webhookdb/jobs/send_invite.rb +3 -2
- data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
- data/lib/webhookdb/jobs/send_webhook.rb +4 -5
- data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
- data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
- data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
- data/lib/webhookdb/jobs/webhook_subscription_delivery_event.rb +5 -8
- data/lib/webhookdb/liquid/expose.rb +1 -1
- data/lib/webhookdb/liquid/filters.rb +1 -1
- data/lib/webhookdb/liquid/partial.rb +2 -2
- data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
- data/lib/webhookdb/logged_webhook.rb +16 -2
- data/lib/webhookdb/message/email_transport.rb +1 -1
- data/lib/webhookdb/message.rb +2 -2
- data/lib/webhookdb/messages/error_generic_backfill.rb +2 -0
- data/lib/webhookdb/messages/error_icalendar_fetch.rb +2 -0
- data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
- data/lib/webhookdb/organization/alerting.rb +50 -4
- data/lib/webhookdb/organization/database_migration.rb +1 -1
- data/lib/webhookdb/organization/db_builder.rb +4 -3
- data/lib/webhookdb/organization/error_handler.rb +141 -0
- data/lib/webhookdb/organization.rb +62 -9
- data/lib/webhookdb/postgres/model_utilities.rb +2 -0
- data/lib/webhookdb/postgres.rb +1 -3
- data/lib/webhookdb/replicator/base.rb +136 -29
- data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
- data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
- data/lib/webhookdb/replicator/fake.rb +100 -88
- data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +105 -44
- data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
- data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +144 -23
- data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
- data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
- data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_v1_mixin.rb +24 -2
- data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
- data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/signalwire_message_v1.rb +1 -2
- data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
- data/lib/webhookdb/replicator.rb +4 -1
- data/lib/webhookdb/service/helpers.rb +4 -0
- data/lib/webhookdb/service/middleware.rb +6 -2
- data/lib/webhookdb/service_integration.rb +5 -0
- data/lib/webhookdb/signalwire.rb +1 -1
- data/lib/webhookdb/spec_helpers/async.rb +0 -4
- data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
- data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +87 -1
- data/lib/webhookdb/spec_helpers.rb +1 -0
- data/lib/webhookdb/sync_target.rb +195 -29
- data/lib/webhookdb/tasks/admin.rb +1 -1
- data/lib/webhookdb/tasks/annotate.rb +1 -1
- data/lib/webhookdb/tasks/db.rb +13 -1
- data/lib/webhookdb/tasks/docs.rb +1 -1
- data/lib/webhookdb/tasks/fixture.rb +1 -1
- data/lib/webhookdb/tasks/message.rb +1 -1
- data/lib/webhookdb/tasks/regress.rb +1 -1
- data/lib/webhookdb/tasks/release.rb +1 -1
- data/lib/webhookdb/tasks/sidekiq.rb +1 -1
- data/lib/webhookdb/tasks/specs.rb +1 -1
- data/lib/webhookdb/version.rb +1 -1
- data/lib/webhookdb/webhook_subscription.rb +2 -3
- data/lib/webhookdb.rb +3 -1
- metadata +88 -54
- data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
- data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
@@ -62,7 +62,7 @@ class Webhookdb::Replicator::Base
|
|
62
62
|
# and the arguments used to upsert it (arguments to upsert_webhook),
|
63
63
|
# and should return the body string to respond back with.
|
64
64
|
#
|
65
|
-
# @param [Hash] upserted
|
65
|
+
# @param [Hash,Array] upserted
|
66
66
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
67
67
|
# @return [String]
|
68
68
|
def synchronous_processing_response_body(upserted:, request:)
|
@@ -318,10 +318,12 @@ for information on how to refresh data.)
|
|
318
318
|
# Find a dependent service integration with the given service name.
|
319
319
|
# If none are found, return nil. If multiple are found, raise,
|
320
320
|
# as this should only be used for automatically managed integrations.
|
321
|
+
# @param service_name [String,Array<String>]
|
321
322
|
# @return [Webhookdb::ServiceIntegration,nil]
|
322
323
|
def find_dependent(service_name)
|
323
|
-
|
324
|
-
|
324
|
+
names = service_name.respond_to?(:to_ary) ? service_name : [service_name]
|
325
|
+
sints = self.service_integration.dependents.filter { |si| names.include?(si.service_name) }
|
326
|
+
raise Webhookdb::InvalidPrecondition, "there are multiple #{names.join('/')} integrations in dependents" if
|
325
327
|
sints.length > 1
|
326
328
|
return sints.first
|
327
329
|
end
|
@@ -356,7 +358,9 @@ for information on how to refresh data.)
|
|
356
358
|
columns << self.data_column
|
357
359
|
adapter = Webhookdb::DBAdapter::PG.new
|
358
360
|
result = Webhookdb::Replicator::SchemaModification.new
|
359
|
-
|
361
|
+
create_table = adapter.create_table_sql(table, columns, if_not_exists:, partition: self.partitioning)
|
362
|
+
result.transaction_statements << create_table
|
363
|
+
result.transaction_statements.concat(self.create_table_partitions(adapter))
|
360
364
|
self.indices(table).each do |dbindex|
|
361
365
|
result.transaction_statements << adapter.create_index_sql(dbindex, concurrently: false)
|
362
366
|
end
|
@@ -364,33 +368,69 @@ for information on how to refresh data.)
|
|
364
368
|
return result
|
365
369
|
end
|
366
370
|
|
371
|
+
# True if the replicator uses partitioning.
|
372
|
+
def partition? = false
|
373
|
+
# Non-nil only if +partition?+ is true.
|
374
|
+
# @return [Webhookdb::DBAdapter::Partitioning,nil]
|
375
|
+
def partitioning = nil
|
376
|
+
|
377
|
+
# Return the partitions belonging to the table.
|
378
|
+
# Return an empty array if this replicator is not partitioned.
|
379
|
+
# @return [Array<Webhookdb::DBAdapter::Partition>]
|
380
|
+
def existing_partitions(_db)
|
381
|
+
raise NotImplementedError if self.partition?
|
382
|
+
return []
|
383
|
+
end
|
384
|
+
|
385
|
+
def create_table_partitions(adapter)
|
386
|
+
return [] unless self.partition?
|
387
|
+
# We only need create_table partitions when we create the table.
|
388
|
+
# Range partitions would be created on demand, when inserting rows and the partition doesn't exist.
|
389
|
+
return [] unless self.partitioning.by == Webhookdb::DBAdapter::Partitioning::HASH
|
390
|
+
|
391
|
+
max_partition = self.service_integration.partition_value
|
392
|
+
raise Webhookdb::InvalidPrecondition, "partition value must be positive" unless max_partition.positive?
|
393
|
+
stmts = (0...max_partition).map do |i|
|
394
|
+
adapter.create_hash_partition_sql(self.dbadapter_table, max_partition, i)
|
395
|
+
end
|
396
|
+
return stmts
|
397
|
+
end
|
398
|
+
|
367
399
|
# We need to give indices a persistent name, unique across the schema,
|
368
400
|
# since multiple indices within a schema cannot share a name.
|
369
401
|
#
|
370
402
|
# Note that in certain RDBMS (Postgres) index names cannot exceed a certian length;
|
371
403
|
# Postgres will silently truncate them. This can result in an index not being created
|
372
|
-
# if it shares the same name as another index and we use 'CREATE INDEX IF NOT EXISTS.'
|
404
|
+
# if it shares the same name as another index, and we use 'CREATE INDEX IF NOT EXISTS.'
|
373
405
|
#
|
374
406
|
# To avoid this, if the generated name exceeds a certain size, an md5 hash of the column names is used.
|
375
407
|
#
|
376
408
|
# @param columns [Array<Webhookdb::DBAdapter::Column, Webhookdb::Replicator::Column>] Must respond to :name.
|
409
|
+
# @param identifier [String,nil] Use this instead of a combination of column names.
|
410
|
+
# Only use this where multiple indices are needed for the same columns, but something like the 'where'
|
411
|
+
# condition is different.
|
377
412
|
# @return [String]
|
378
|
-
protected def index_name(columns)
|
413
|
+
protected def index_name(columns, identifier: nil)
|
379
414
|
raise Webhookdb::InvalidPrecondition, "sint needs an opaque id" if self.service_integration.opaque_id.blank?
|
380
415
|
colnames = columns.map(&:name).join("_")
|
381
416
|
opaque_id = self.service_integration.opaque_id
|
382
417
|
# Handle old IDs without the leading 'svi_'.
|
383
418
|
opaque_id = "idx#{opaque_id}" if /\d/.match?(opaque_id[0])
|
384
|
-
|
385
|
-
if
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
419
|
+
|
420
|
+
if identifier
|
421
|
+
name = "#{opaque_id}_#{identifier}_idx"
|
422
|
+
else
|
423
|
+
name = "#{opaque_id}_#{colnames}_idx"
|
424
|
+
if name.size > MAX_INDEX_NAME_LENGTH
|
425
|
+
# We don't have the 32 extra chars for a full md5 hash.
|
426
|
+
# We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
|
427
|
+
# So just lop off a few characters (normally 2) from the end of the md5.
|
428
|
+
# The collision space is so small (some combination of column names would need to have the
|
429
|
+
# same md5, which is unfathomable), we're not really worried about it.
|
430
|
+
colnames_md5 = Digest::MD5.hexdigest(colnames)
|
431
|
+
available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
|
432
|
+
name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
|
433
|
+
end
|
394
434
|
end
|
395
435
|
raise Webhookdb::InvariantViolation, "index names cannot exceed 63 chars, got #{name.size} in '#{name}'" if
|
396
436
|
name.size > 63
|
@@ -406,7 +446,12 @@ for information on how to refresh data.)
|
|
406
446
|
|
407
447
|
# @return [Webhookdb::DBAdapter::Column]
|
408
448
|
def remote_key_column
|
409
|
-
|
449
|
+
c = self._remote_key_column
|
450
|
+
if c.index?
|
451
|
+
msg = "_remote_key_column index:true should not be set, since it automatically gets a unique index"
|
452
|
+
Kernel.warn msg
|
453
|
+
end
|
454
|
+
return c.to_dbadapter(unique: true, nullable: false, index: false)
|
410
455
|
end
|
411
456
|
|
412
457
|
# @return [Webhookdb::DBAdapter::Column]
|
@@ -465,6 +510,9 @@ for information on how to refresh data.)
|
|
465
510
|
# Each integration needs a single remote key, like the Shopify order id for shopify orders,
|
466
511
|
# or sid for Twilio resources. This column must be unique for the table, like a primary key.
|
467
512
|
#
|
513
|
+
# NOTE: Do not set index:true. The remote key column always must be unique,
|
514
|
+
# so it gets a unique index automatically.
|
515
|
+
#
|
468
516
|
# @abstract
|
469
517
|
# @return [Webhookdb::Replicator::Column]
|
470
518
|
def _remote_key_column
|
@@ -495,9 +543,16 @@ for information on how to refresh data.)
|
|
495
543
|
end
|
496
544
|
self._extra_index_specs.each do |spec|
|
497
545
|
targets = spec.columns.map { |n| dba_cols_by_name.fetch(n) }
|
498
|
-
idx_name = self.index_name(targets)
|
546
|
+
idx_name = self.index_name(targets, identifier: spec.identifier)
|
499
547
|
result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: spec.where)
|
500
548
|
end
|
549
|
+
index_names = result.map(&:name)
|
550
|
+
if (dupes = index_names.find_all.with_index { |n, idx| idx != index_names.rindex(n) }).any?
|
551
|
+
msg = "Duplicate index names detected. Use the 'name' attribute to differentiate: " +
|
552
|
+
dupes.map(&:to_s).join(", ")
|
553
|
+
raise Webhookdb::Replicator::BrokenSpecification, msg
|
554
|
+
end
|
555
|
+
|
501
556
|
return result
|
502
557
|
end
|
503
558
|
|
@@ -520,7 +575,7 @@ for information on how to refresh data.)
|
|
520
575
|
|
521
576
|
# @return [Webhookdb::Replicator::SchemaModification]
|
522
577
|
def ensure_all_columns_modification
|
523
|
-
existing_cols, existing_indices = nil
|
578
|
+
existing_cols, existing_indices, existing_partitions = nil
|
524
579
|
max_pk = 0
|
525
580
|
sint = self.service_integration
|
526
581
|
self.admin_dataset do |ds|
|
@@ -531,6 +586,7 @@ for information on how to refresh data.)
|
|
531
586
|
tablename: sint.table_name,
|
532
587
|
).select_map(:indexname).to_set
|
533
588
|
max_pk = ds.max(:pk) || 0
|
589
|
+
existing_partitions = self.existing_partitions(ds.db)
|
534
590
|
end
|
535
591
|
adapter = Webhookdb::DBAdapter::PG.new
|
536
592
|
table = self.dbadapter_table
|
@@ -577,7 +633,9 @@ for information on how to refresh data.)
|
|
577
633
|
# Add missing indices. This should happen AFTER the UPDATE calls so the UPDATEs don't have to update indices.
|
578
634
|
self.indices(table).map do |index|
|
579
635
|
next if existing_indices.include?(index.name.to_s)
|
580
|
-
result.nontransaction_statements
|
636
|
+
result.nontransaction_statements.concat(
|
637
|
+
adapter.create_index_sqls(index, concurrently: true, partitions: existing_partitions),
|
638
|
+
)
|
581
639
|
end
|
582
640
|
|
583
641
|
result.application_database_statements << sint.ensure_sequence_sql if self.requires_sequence?
|
@@ -641,6 +699,7 @@ for information on how to refresh data.)
|
|
641
699
|
# like when we have to take different action based on a request method.
|
642
700
|
#
|
643
701
|
# @param body [Hash]
|
702
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
644
703
|
def upsert_webhook_body(body, **kw)
|
645
704
|
return self.upsert_webhook(Webhookdb::Replicator::WebhookRequest.new(body:), **kw)
|
646
705
|
end
|
@@ -649,13 +708,14 @@ for information on how to refresh data.)
|
|
649
708
|
# NOT a Rack::Request.
|
650
709
|
#
|
651
710
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
711
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
652
712
|
def upsert_webhook(request, **kw)
|
653
713
|
return self._upsert_webhook(request, **kw)
|
654
714
|
rescue Amigo::Retry::Error
|
655
715
|
# Do not log this since it's expected/handled by Amigo
|
656
716
|
raise
|
657
717
|
rescue StandardError => e
|
658
|
-
self.logger.error("upsert_webhook_error", request: request.as_json,
|
718
|
+
self.logger.error("upsert_webhook_error", {request: request.as_json}, e)
|
659
719
|
raise
|
660
720
|
end
|
661
721
|
|
@@ -664,9 +724,23 @@ for information on how to refresh data.)
|
|
664
724
|
#
|
665
725
|
# @param request [Webhookdb::Replicator::WebhookRequest]
|
666
726
|
# @param upsert [Boolean] If false, just return what would be upserted.
|
727
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
667
728
|
def _upsert_webhook(request, upsert: true)
|
668
|
-
|
669
|
-
return nil if
|
729
|
+
resource_or_list, event = self._resource_and_event(request)
|
730
|
+
return nil if resource_or_list.nil?
|
731
|
+
if resource_or_list.is_a?(Array)
|
732
|
+
unless event.nil?
|
733
|
+
msg = "resource_and_event cannot return an array of resources with a non-nil event"
|
734
|
+
raise Webhookdb::InvalidPostcondition, msg
|
735
|
+
end
|
736
|
+
return resource_or_list.map do |resource|
|
737
|
+
self._upsert_webhook_single_resource(request, resource:, event:, upsert:)
|
738
|
+
end
|
739
|
+
end
|
740
|
+
return self._upsert_webhook_single_resource(request, resource: resource_or_list, event:, upsert:)
|
741
|
+
end
|
742
|
+
|
743
|
+
def _upsert_webhook_single_resource(request, resource:, event:, upsert:)
|
670
744
|
enrichment = self._fetch_enrichment(resource, event, request)
|
671
745
|
prepared = self._prepare_for_insert(resource, event, request, enrichment)
|
672
746
|
raise Webhookdb::InvalidPostcondition if prepared.key?(:data)
|
@@ -676,12 +750,11 @@ for information on how to refresh data.)
|
|
676
750
|
inserting[:enrichment] = self._to_json(enrichment) if self._store_enrichment_body?
|
677
751
|
inserting.merge!(prepared)
|
678
752
|
return inserting unless upsert
|
679
|
-
remote_key_col = self._remote_key_column
|
680
753
|
updating = self._upsert_update_expr(inserting, enrichment:)
|
681
754
|
update_where = self._update_where_expr
|
682
755
|
upserted_rows = self.admin_dataset(timeout: :fast) do |ds|
|
683
756
|
ds.insert_conflict(
|
684
|
-
target:
|
757
|
+
target: self._upsert_conflict_target,
|
685
758
|
update: updating,
|
686
759
|
update_where:,
|
687
760
|
).insert(inserting)
|
@@ -692,6 +765,12 @@ for information on how to refresh data.)
|
|
692
765
|
return inserting
|
693
766
|
end
|
694
767
|
|
768
|
+
# The target for ON CONFLICT. Usually the remote key column name,
|
769
|
+
# except if the remote id is a compound unique index, like for partitioned tables.
|
770
|
+
# Can be a symbol, array of symbols representing the column names, a +Sequel.lit+, etc.
|
771
|
+
# See +Sequel::Dataset.insert_conflict+ :target option for details.
|
772
|
+
def _upsert_conflict_target = self._remote_key_column.name
|
773
|
+
|
695
774
|
# The NULL ASCII character (\u0000), when present in a string ("\u0000"),
|
696
775
|
# and then encoded into JSON ("\\u0000") is invalid in PG JSONB- its strings cannot contain NULLs
|
697
776
|
# (note that JSONB does not store the encoded string verbatim, it parses it into PG types, and a PG string
|
@@ -796,7 +875,7 @@ for information on how to refresh data.)
|
|
796
875
|
#
|
797
876
|
# @abstract
|
798
877
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
799
|
-
# @return [Array<Hash>,nil]
|
878
|
+
# @return [Array<Hash,Array>,nil]
|
800
879
|
def _resource_and_event(request)
|
801
880
|
raise NotImplementedError
|
802
881
|
end
|
@@ -906,10 +985,10 @@ for information on how to refresh data.)
|
|
906
985
|
# - The table OID for this replicator
|
907
986
|
# - The given key
|
908
987
|
#
|
909
|
-
# Note this
|
988
|
+
# Note this establishes a new DB connection for the advisory lock;
|
910
989
|
# we have had issues with advisory locks on reused connections,
|
911
990
|
# and this is safer than having a lock that is never released.
|
912
|
-
|
991
|
+
def with_advisory_lock(key, &)
|
913
992
|
url = self.service_integration.organization.admin_connection_url_raw
|
914
993
|
got = nil
|
915
994
|
Webhookdb::Dbutil.borrow_conn(url) do |conn|
|
@@ -969,7 +1048,7 @@ for information on how to refresh data.)
|
|
969
1048
|
rescue TypeError, NoMethodError => e
|
970
1049
|
# if we don't incur an HTTP error, but do incur an Error due to differences in the shapes of anticipated
|
971
1050
|
# response data in the `fetch_backfill_page` function, we can assume that the credentials are okay
|
972
|
-
self.logger.info "verify_backfill_credentials_expected_failure",
|
1051
|
+
self.logger.info "verify_backfill_credentials_expected_failure", e
|
973
1052
|
return CredentialVerificationResult.new(verified: true, message: "")
|
974
1053
|
end
|
975
1054
|
return CredentialVerificationResult.new(verified: true, message: "")
|
@@ -1190,6 +1269,34 @@ or leave blank to choose the first option.
|
|
1190
1269
|
return self._webhook_endpoint
|
1191
1270
|
end
|
1192
1271
|
|
1272
|
+
# Avoid writes under the following conditions:
|
1273
|
+
#
|
1274
|
+
# - A table lock is taken on the table
|
1275
|
+
# - A vacuum is in progress on the table
|
1276
|
+
#
|
1277
|
+
# Of course, in most situations we want to write anyway,
|
1278
|
+
# but there are some cases (lower-priority replicators for example)
|
1279
|
+
# where we can reschedule the job to happen in the future instead.
|
1280
|
+
def avoid_writes?
|
1281
|
+
# We will need to handle this differently when not under Postgres, but for now,
|
1282
|
+
# just assume Postgres.
|
1283
|
+
# Find the admin URL for the organization's server (NOT the organization admin url, it can't see system processes).
|
1284
|
+
# Then check for 1) vacuums in progress, 2) locks.
|
1285
|
+
self.service_integration.organization.readonly_connection do |db|
|
1286
|
+
count = db[:pg_locks].
|
1287
|
+
join(:pg_class, {oid: :relation}).
|
1288
|
+
join(:pg_namespace, {oid: :relnamespace}).
|
1289
|
+
where(
|
1290
|
+
locktype: "relation",
|
1291
|
+
nspname: self.service_integration.organization.replication_schema,
|
1292
|
+
relname: self.service_integration.table_name,
|
1293
|
+
mode: ["ShareUpdateExclusiveLock", "ExclusiveLock", "AccessExclusiveLock"],
|
1294
|
+
).limit(1).count
|
1295
|
+
return true if count&.positive?
|
1296
|
+
end
|
1297
|
+
return false
|
1298
|
+
end
|
1299
|
+
|
1193
1300
|
protected def _webhook_endpoint
|
1194
1301
|
return self.service_integration.unauthed_webhook_endpoint
|
1195
1302
|
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Delete stale rows (like cancelled calendar events) not updated (row_updated_at or whatever column)
|
4
|
+
# in the window between +stale_at+ back to +lookback_window+.
|
5
|
+
# This avoids endlessly adding to a table where we expect rows to become stale over time.
|
6
|
+
class Webhookdb::Replicator::BaseStaleRowDeleter
|
7
|
+
# @return [Webhookdb::Replicator::Base]
|
8
|
+
attr_reader :replicator
|
9
|
+
|
10
|
+
def initialize(replicator)
|
11
|
+
@replicator = replicator
|
12
|
+
end
|
13
|
+
|
14
|
+
# When a row is considered 'stale'.
|
15
|
+
# For example, a value of +35.days+ would treat any row older than 35 days as stale.
|
16
|
+
# @return [ActiveSupport::Duration]
|
17
|
+
def stale_at
|
18
|
+
raise NotImplementedError
|
19
|
+
end
|
20
|
+
|
21
|
+
# How far from +stale_at+ to "look back" for stale rows.
|
22
|
+
# We cannot just use "row_updated_at < stale_at" since this would scan ALL the rows
|
23
|
+
# every time we delete rows. Instead, we only want to scale rows where
|
24
|
+
# "row_updated_at < stale_at AND row_updated_at > (stale_at - lookback_window)".
|
25
|
+
# For example, a +stale_at+ of 20 days and a +lookback_window+ of 7 days
|
26
|
+
# would look to delete rows 20 to 27 days old.
|
27
|
+
#
|
28
|
+
# If the stale row deleter is run daily, a good lookback window would be 2-3 days,
|
29
|
+
# since as long as the job is running we shouldn't find rows that aren't cleaned up.
|
30
|
+
#
|
31
|
+
# Use +run_initial+ to do a full table scan,
|
32
|
+
# which may be necessary when running this feature for a table for the first time.
|
33
|
+
# @return [ActiveSupport::Duration]
|
34
|
+
def lookback_window
|
35
|
+
raise NotImplementedError
|
36
|
+
end
|
37
|
+
|
38
|
+
# Name of the column, like +:row_updated_at+.
|
39
|
+
# @return [Symbol]
|
40
|
+
def updated_at_column
|
41
|
+
raise NotImplementedError
|
42
|
+
end
|
43
|
+
|
44
|
+
# Other additional 'stale' conditions, like {status: 'cancelled'}
|
45
|
+
# @return [Hash]
|
46
|
+
def stale_condition
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# The row delete is done in chunks to avoid long locks.
|
51
|
+
# The default seems safe, but it's exposed if you need to play around with it,
|
52
|
+
# and can be done via configuration if needed at some point.
|
53
|
+
# @return [Integer]
|
54
|
+
def chunk_size = 10_000
|
55
|
+
|
56
|
+
# How small should the incremental lookback window be? See +run+ for details.
|
57
|
+
# A size of 1 hour, and a lookback window of 2 days, would yield at least 48 delete queries.
|
58
|
+
def incremental_lookback_size = 1.hour
|
59
|
+
|
60
|
+
# Run the deleter.
|
61
|
+
# @param lookback_window [nil,ActiveSupport::Duration] The lookback window
|
62
|
+
# (how many days before +stale_cutoff+ to look for rows). Use +nil+ to look for all rows.
|
63
|
+
def run(lookback_window: self.lookback_window)
|
64
|
+
# The algorithm to delete stale rows is complex for a couple of reasons.
|
65
|
+
# The native solution is "delete rows where updated_at > (stale_at - lookback_window) AND updated_at < stale_at"
|
66
|
+
# However, this would cause a single massive query over the entire candidate row space,
|
67
|
+
# which has problems:
|
68
|
+
# - The query can be very slow
|
69
|
+
# - Deadlocks can happen due to the slow query.
|
70
|
+
# - If the query is interrupted (due to a worker restart), all progress is lost.
|
71
|
+
# - Scanning the large 'updated at timestamp' index can cause the database to do a sequential scan.
|
72
|
+
#
|
73
|
+
# Instead, we need to do issue a series of fast queries over small 'updated at' windows:
|
74
|
+
#
|
75
|
+
# - Break the lookback period into hour-long windows.
|
76
|
+
# If the lookback_window is 2 days, this would issue 48 queries.
|
77
|
+
# But each one would be very fast, since the column is indexed.
|
78
|
+
# - For each small window, delete in chunks, like:
|
79
|
+
# DELETE from "public"."icalendar_event_v1_aaaa"
|
80
|
+
# WHERE pk IN (
|
81
|
+
# SELECT pk FROM "public"."icalendar_event_v1_aaaa"
|
82
|
+
# WHERE row_updated_at >= (hour start)
|
83
|
+
# AND row_updated_at < (hour end)
|
84
|
+
# LIMIT (chunk size)
|
85
|
+
# )
|
86
|
+
# - Issue each DELETE within a transaction with seqscan disabled.
|
87
|
+
# This is crude, but we know for our usage case that we never want a seqscan.
|
88
|
+
# - Using the chunked delete with the hour-long (small-sized) windows
|
89
|
+
# is important. Because each chunk requires scanning potentially the entire indexed row space,
|
90
|
+
# it would take longer and longer to find 10k rows to fill the chunk.
|
91
|
+
# This is, for example, the same performance problem that OFFSET/LIMIT pagination
|
92
|
+
# has at later pages (but not earlier pages).
|
93
|
+
self.replicator.admin_dataset do |ds|
|
94
|
+
stale_window_late = Time.now - self.stale_at
|
95
|
+
stale_window_early = lookback_window.nil? ? ds.min(self.updated_at_column) : stale_window_late - lookback_window
|
96
|
+
# If we are querying the whole table (no lookback window), and have no rows,
|
97
|
+
# there's nothing to clean up.
|
98
|
+
break if stale_window_early.nil?
|
99
|
+
|
100
|
+
# We must disable vacuuming for this sort of cleanup.
|
101
|
+
# Otherwise, it will take a LONG time since we use a series of short deletes.
|
102
|
+
self.set_autovacuum(ds.db, false)
|
103
|
+
if self.replicator.partition?
|
104
|
+
# If the replicator is partitioned, we need to delete stale rows on partition separately.
|
105
|
+
# We DELETE with a LIMIT in chunks, but when we run this on the main table, it'll run the query
|
106
|
+
# on every partition BEFORE applying the limit. You'll see this manifest with speed,
|
107
|
+
# but also the planner using a sequential scan for the delete, rather than hitting an index.
|
108
|
+
# Instead, DELETE from each partition in chunks, which will use the indices, and apply the limit properly.
|
109
|
+
self.replicator.existing_partitions(ds.db).each do |p|
|
110
|
+
pdb = ds.db[self.replicator.qualified_table_sequel_identifier(table: p.partition_name)]
|
111
|
+
self._run_delete(pdb, stale_window_early:, stale_window_late:)
|
112
|
+
end
|
113
|
+
else
|
114
|
+
self._run_delete(ds, stale_window_early:, stale_window_late:)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
ensure
|
118
|
+
# Open a new connection in case the previous one is trashed for whatever reason.
|
119
|
+
self.replicator.admin_dataset do |ds|
|
120
|
+
self.set_autovacuum(ds.db, true)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def _run_delete(ds, stale_window_early:, stale_window_late:)
|
125
|
+
base_ds = ds.where(self.stale_condition).limit(self.chunk_size).select(:pk)
|
126
|
+
window_start = stale_window_early
|
127
|
+
until window_start >= stale_window_late
|
128
|
+
window_end = window_start + self.incremental_lookback_size
|
129
|
+
inner_ds = base_ds.where(self.updated_at_column => window_start..window_end)
|
130
|
+
loop do
|
131
|
+
# Due to conflicts where a feed is being inserted while the delete is happening,
|
132
|
+
# this may raise an error like:
|
133
|
+
# deadlock detected
|
134
|
+
# DETAIL: Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
|
135
|
+
# Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
|
136
|
+
# HINT: See server log for query details.
|
137
|
+
# CONTEXT: while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
|
138
|
+
# So we don't explicitly handle deadlocks, but could if it becomes an issue.
|
139
|
+
delete_ds = ds.where(pk: inner_ds)
|
140
|
+
# Disable seqscan for the delete. We can end up with seqscans if the planner decides
|
141
|
+
# it's a better choice given the 'updated at' index, but for our purposes we know
|
142
|
+
# we never want to use it (the impact is negligible on small tables,
|
143
|
+
# and catastrophic on large tables).
|
144
|
+
sql_lines = [
|
145
|
+
"BEGIN",
|
146
|
+
"SET LOCAL enable_seqscan='off'",
|
147
|
+
delete_ds.delete_sql,
|
148
|
+
"COMMIT",
|
149
|
+
]
|
150
|
+
deleted = ds.db << sql_lines.join(";\n")
|
151
|
+
break if deleted != self.chunk_size
|
152
|
+
end
|
153
|
+
window_start = window_end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def set_autovacuum(db, on)
|
158
|
+
return if self.replicator.partition?
|
159
|
+
arg = on ? "on" : "off"
|
160
|
+
db << "ALTER TABLE #{self.replicator.schema_and_table_symbols.join('.')} SET (autovacuum_enabled='#{arg}')"
|
161
|
+
end
|
162
|
+
|
163
|
+
# Run with +lookback_window+ as +nil+, which does a full table scan.
|
164
|
+
def run_initial = self.run(lookback_window: nil)
|
165
|
+
end
|