webhookdb 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/admin-dist/assets/{index-6aebf805.js → index-9306dd28.js} +39 -39
- data/admin-dist/index.html +1 -1
- data/data/messages/templates/errors/generic_backfill.email.liquid +30 -0
- data/data/messages/templates/errors/icalendar_fetch.email.liquid +8 -2
- data/data/messages/templates/specs/with_fields.email.liquid +6 -0
- data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
- data/db/migrations/032_remove_db_defaults.rb +2 -0
- data/db/migrations/043_text_search.rb +2 -0
- data/db/migrations/045_system_log.rb +15 -0
- data/db/migrations/046_indices.rb +14 -0
- data/db/migrations/047_sync_parallelism.rb +9 -0
- data/db/migrations/048_sync_stats.rb +9 -0
- data/db/migrations/049_error_handlers.rb +18 -0
- data/db/migrations/050_logged_webhook_indices.rb +25 -0
- data/db/migrations/051_partitioning.rb +9 -0
- data/integration/async_spec.rb +0 -2
- data/integration/service_integrations_spec.rb +0 -2
- data/lib/amigo/durable_job.rb +2 -2
- data/lib/amigo/job_in_context.rb +12 -0
- data/lib/webhookdb/admin.rb +6 -0
- data/lib/webhookdb/admin_api/data_provider.rb +1 -0
- data/lib/webhookdb/admin_api/entities.rb +8 -0
- data/lib/webhookdb/aggregate_result.rb +1 -1
- data/lib/webhookdb/api/entities.rb +6 -2
- data/lib/webhookdb/api/error_handlers.rb +104 -0
- data/lib/webhookdb/api/helpers.rb +25 -1
- data/lib/webhookdb/api/icalproxy.rb +22 -0
- data/lib/webhookdb/api/install.rb +2 -1
- data/lib/webhookdb/api/organizations.rb +6 -0
- data/lib/webhookdb/api/saved_queries.rb +1 -0
- data/lib/webhookdb/api/saved_views.rb +1 -0
- data/lib/webhookdb/api/service_integrations.rb +2 -1
- data/lib/webhookdb/api/sync_targets.rb +1 -1
- data/lib/webhookdb/api/system.rb +5 -0
- data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
- data/lib/webhookdb/api.rb +4 -1
- data/lib/webhookdb/apps.rb +4 -0
- data/lib/webhookdb/async/autoscaler.rb +10 -0
- data/lib/webhookdb/async/job.rb +4 -0
- data/lib/webhookdb/async/scheduled_job.rb +4 -0
- data/lib/webhookdb/async.rb +2 -0
- data/lib/webhookdb/backfiller.rb +17 -4
- data/lib/webhookdb/concurrent.rb +96 -0
- data/lib/webhookdb/connection_cache.rb +57 -10
- data/lib/webhookdb/console.rb +1 -1
- data/lib/webhookdb/customer/reset_code.rb +1 -1
- data/lib/webhookdb/customer.rb +5 -4
- data/lib/webhookdb/database_document.rb +1 -1
- data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
- data/lib/webhookdb/db_adapter/partition.rb +14 -0
- data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
- data/lib/webhookdb/db_adapter/pg.rb +77 -5
- data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
- data/lib/webhookdb/db_adapter.rb +25 -3
- data/lib/webhookdb/dbutil.rb +2 -0
- data/lib/webhookdb/errors.rb +34 -0
- data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
- data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
- data/lib/webhookdb/http.rb +30 -16
- data/lib/webhookdb/icalendar.rb +30 -9
- data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
- data/lib/webhookdb/jobs/backfill.rb +21 -25
- data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
- data/lib/webhookdb/jobs/deprecated_jobs.rb +3 -0
- data/lib/webhookdb/jobs/emailer.rb +2 -1
- data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
- data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
- data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
- data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
- data/lib/webhookdb/jobs/{logged_webhook_replay.rb → logged_webhooks_replay.rb} +5 -3
- data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
- data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +112 -0
- data/lib/webhookdb/jobs/monitor_metrics.rb +29 -0
- data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
- data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
- data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
- data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
- data/lib/webhookdb/jobs/process_webhook.rb +11 -12
- data/lib/webhookdb/jobs/renew_watch_channel.rb +10 -10
- data/lib/webhookdb/jobs/replication_migration.rb +5 -2
- data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
- data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
- data/lib/webhookdb/jobs/send_invite.rb +3 -2
- data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
- data/lib/webhookdb/jobs/send_webhook.rb +4 -5
- data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
- data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
- data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
- data/lib/webhookdb/jobs/{webhook_subscription_delivery_attempt.rb → webhook_subscription_delivery_event.rb} +5 -8
- data/lib/webhookdb/liquid/expose.rb +1 -1
- data/lib/webhookdb/liquid/filters.rb +1 -1
- data/lib/webhookdb/liquid/partial.rb +2 -2
- data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
- data/lib/webhookdb/logged_webhook.rb +16 -2
- data/lib/webhookdb/message/email_transport.rb +1 -1
- data/lib/webhookdb/message/transport.rb +1 -1
- data/lib/webhookdb/message.rb +55 -4
- data/lib/webhookdb/messages/error_generic_backfill.rb +47 -0
- data/lib/webhookdb/messages/error_icalendar_fetch.rb +5 -0
- data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
- data/lib/webhookdb/messages/specs.rb +16 -0
- data/lib/webhookdb/organization/alerting.rb +56 -6
- data/lib/webhookdb/organization/database_migration.rb +2 -2
- data/lib/webhookdb/organization/db_builder.rb +5 -4
- data/lib/webhookdb/organization/error_handler.rb +141 -0
- data/lib/webhookdb/organization.rb +76 -10
- data/lib/webhookdb/postgres/model.rb +1 -0
- data/lib/webhookdb/postgres/model_utilities.rb +2 -0
- data/lib/webhookdb/postgres.rb +3 -4
- data/lib/webhookdb/replicator/base.rb +202 -68
- data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
- data/lib/webhookdb/replicator/column.rb +2 -0
- data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
- data/lib/webhookdb/replicator/fake.rb +106 -88
- data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +131 -61
- data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
- data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +197 -32
- data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
- data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
- data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_v1_mixin.rb +49 -6
- data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
- data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/signalwire_message_v1.rb +31 -1
- data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
- data/lib/webhookdb/replicator/transistor_episode_v1.rb +11 -5
- data/lib/webhookdb/replicator/webhook_request.rb +8 -0
- data/lib/webhookdb/replicator.rb +6 -3
- data/lib/webhookdb/service/helpers.rb +4 -0
- data/lib/webhookdb/service/middleware.rb +6 -2
- data/lib/webhookdb/service/view_api.rb +1 -1
- data/lib/webhookdb/service.rb +10 -10
- data/lib/webhookdb/service_integration.rb +19 -1
- data/lib/webhookdb/signalwire.rb +1 -1
- data/lib/webhookdb/spec_helpers/async.rb +0 -4
- data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
- data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +239 -64
- data/lib/webhookdb/spec_helpers.rb +1 -0
- data/lib/webhookdb/sync_target.rb +202 -34
- data/lib/webhookdb/system_log_event.rb +9 -0
- data/lib/webhookdb/tasks/admin.rb +1 -1
- data/lib/webhookdb/tasks/annotate.rb +1 -1
- data/lib/webhookdb/tasks/db.rb +13 -1
- data/lib/webhookdb/tasks/docs.rb +1 -1
- data/lib/webhookdb/tasks/fixture.rb +1 -1
- data/lib/webhookdb/tasks/message.rb +1 -1
- data/lib/webhookdb/tasks/regress.rb +1 -1
- data/lib/webhookdb/tasks/release.rb +1 -1
- data/lib/webhookdb/tasks/sidekiq.rb +1 -1
- data/lib/webhookdb/tasks/specs.rb +1 -1
- data/lib/webhookdb/version.rb +1 -1
- data/lib/webhookdb/webhook_subscription.rb +3 -4
- data/lib/webhookdb.rb +34 -8
- metadata +114 -64
- data/lib/webhookdb/jobs/customer_created_notify_internal.rb +0 -22
- data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
- data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
- /data/lib/webhookdb/jobs/{logged_webhook_resilient_replay.rb → logged_webhooks_resilient_replay.rb} +0 -0
- /data/lib/webhookdb/jobs/{webhook_resource_notify_integrations.rb → webhookdb_resource_notify_integrations.rb} +0 -0
@@ -62,7 +62,7 @@ class Webhookdb::Replicator::Base
|
|
62
62
|
# and the arguments used to upsert it (arguments to upsert_webhook),
|
63
63
|
# and should return the body string to respond back with.
|
64
64
|
#
|
65
|
-
# @param [Hash] upserted
|
65
|
+
# @param [Hash,Array] upserted
|
66
66
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
67
67
|
# @return [String]
|
68
68
|
def synchronous_processing_response_body(upserted:, request:)
|
@@ -318,10 +318,12 @@ for information on how to refresh data.)
|
|
318
318
|
# Find a dependent service integration with the given service name.
|
319
319
|
# If none are found, return nil. If multiple are found, raise,
|
320
320
|
# as this should only be used for automatically managed integrations.
|
321
|
+
# @param service_name [String,Array<String>]
|
321
322
|
# @return [Webhookdb::ServiceIntegration,nil]
|
322
323
|
def find_dependent(service_name)
|
323
|
-
|
324
|
-
|
324
|
+
names = service_name.respond_to?(:to_ary) ? service_name : [service_name]
|
325
|
+
sints = self.service_integration.dependents.filter { |si| names.include?(si.service_name) }
|
326
|
+
raise Webhookdb::InvalidPrecondition, "there are multiple #{names.join('/')} integrations in dependents" if
|
325
327
|
sints.length > 1
|
326
328
|
return sints.first
|
327
329
|
end
|
@@ -356,7 +358,9 @@ for information on how to refresh data.)
|
|
356
358
|
columns << self.data_column
|
357
359
|
adapter = Webhookdb::DBAdapter::PG.new
|
358
360
|
result = Webhookdb::Replicator::SchemaModification.new
|
359
|
-
|
361
|
+
create_table = adapter.create_table_sql(table, columns, if_not_exists:, partition: self.partitioning)
|
362
|
+
result.transaction_statements << create_table
|
363
|
+
result.transaction_statements.concat(self.create_table_partitions(adapter))
|
360
364
|
self.indices(table).each do |dbindex|
|
361
365
|
result.transaction_statements << adapter.create_index_sql(dbindex, concurrently: false)
|
362
366
|
end
|
@@ -364,33 +368,69 @@ for information on how to refresh data.)
|
|
364
368
|
return result
|
365
369
|
end
|
366
370
|
|
371
|
+
# True if the replicator uses partitioning.
|
372
|
+
def partition? = false
|
373
|
+
# Non-nil only if +partition?+ is true.
|
374
|
+
# @return [Webhookdb::DBAdapter::Partitioning,nil]
|
375
|
+
def partitioning = nil
|
376
|
+
|
377
|
+
# Return the partitions belonging to the table.
|
378
|
+
# Return an empty array if this replicator is not partitioned.
|
379
|
+
# @return [Array<Webhookdb::DBAdapter::Partition>]
|
380
|
+
def existing_partitions(_db)
|
381
|
+
raise NotImplementedError if self.partition?
|
382
|
+
return []
|
383
|
+
end
|
384
|
+
|
385
|
+
def create_table_partitions(adapter)
|
386
|
+
return [] unless self.partition?
|
387
|
+
# We only need create_table partitions when we create the table.
|
388
|
+
# Range partitions would be created on demand, when inserting rows and the partition doesn't exist.
|
389
|
+
return [] unless self.partitioning.by == Webhookdb::DBAdapter::Partitioning::HASH
|
390
|
+
|
391
|
+
max_partition = self.service_integration.partition_value
|
392
|
+
raise Webhookdb::InvalidPrecondition, "partition value must be positive" unless max_partition.positive?
|
393
|
+
stmts = (0...max_partition).map do |i|
|
394
|
+
adapter.create_hash_partition_sql(self.dbadapter_table, max_partition, i)
|
395
|
+
end
|
396
|
+
return stmts
|
397
|
+
end
|
398
|
+
|
367
399
|
# We need to give indices a persistent name, unique across the schema,
|
368
400
|
# since multiple indices within a schema cannot share a name.
|
369
401
|
#
|
370
402
|
# Note that in certain RDBMS (Postgres) index names cannot exceed a certian length;
|
371
403
|
# Postgres will silently truncate them. This can result in an index not being created
|
372
|
-
# if it shares the same name as another index and we use 'CREATE INDEX IF NOT EXISTS.'
|
404
|
+
# if it shares the same name as another index, and we use 'CREATE INDEX IF NOT EXISTS.'
|
373
405
|
#
|
374
406
|
# To avoid this, if the generated name exceeds a certain size, an md5 hash of the column names is used.
|
375
407
|
#
|
376
408
|
# @param columns [Array<Webhookdb::DBAdapter::Column, Webhookdb::Replicator::Column>] Must respond to :name.
|
409
|
+
# @param identifier [String,nil] Use this instead of a combination of column names.
|
410
|
+
# Only use this where multiple indices are needed for the same columns, but something like the 'where'
|
411
|
+
# condition is different.
|
377
412
|
# @return [String]
|
378
|
-
protected def index_name(columns)
|
413
|
+
protected def index_name(columns, identifier: nil)
|
379
414
|
raise Webhookdb::InvalidPrecondition, "sint needs an opaque id" if self.service_integration.opaque_id.blank?
|
380
415
|
colnames = columns.map(&:name).join("_")
|
381
416
|
opaque_id = self.service_integration.opaque_id
|
382
417
|
# Handle old IDs without the leading 'svi_'.
|
383
418
|
opaque_id = "idx#{opaque_id}" if /\d/.match?(opaque_id[0])
|
384
|
-
|
385
|
-
if
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
419
|
+
|
420
|
+
if identifier
|
421
|
+
name = "#{opaque_id}_#{identifier}_idx"
|
422
|
+
else
|
423
|
+
name = "#{opaque_id}_#{colnames}_idx"
|
424
|
+
if name.size > MAX_INDEX_NAME_LENGTH
|
425
|
+
# We don't have the 32 extra chars for a full md5 hash.
|
426
|
+
# We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
|
427
|
+
# So just lop off a few characters (normally 2) from the end of the md5.
|
428
|
+
# The collision space is so small (some combination of column names would need to have the
|
429
|
+
# same md5, which is unfathomable), we're not really worried about it.
|
430
|
+
colnames_md5 = Digest::MD5.hexdigest(colnames)
|
431
|
+
available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
|
432
|
+
name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
|
433
|
+
end
|
394
434
|
end
|
395
435
|
raise Webhookdb::InvariantViolation, "index names cannot exceed 63 chars, got #{name.size} in '#{name}'" if
|
396
436
|
name.size > 63
|
@@ -406,7 +446,12 @@ for information on how to refresh data.)
|
|
406
446
|
|
407
447
|
# @return [Webhookdb::DBAdapter::Column]
|
408
448
|
def remote_key_column
|
409
|
-
|
449
|
+
c = self._remote_key_column
|
450
|
+
if c.index?
|
451
|
+
msg = "_remote_key_column index:true should not be set, since it automatically gets a unique index"
|
452
|
+
Kernel.warn msg
|
453
|
+
end
|
454
|
+
return c.to_dbadapter(unique: true, nullable: false, index: false)
|
410
455
|
end
|
411
456
|
|
412
457
|
# @return [Webhookdb::DBAdapter::Column]
|
@@ -465,6 +510,9 @@ for information on how to refresh data.)
|
|
465
510
|
# Each integration needs a single remote key, like the Shopify order id for shopify orders,
|
466
511
|
# or sid for Twilio resources. This column must be unique for the table, like a primary key.
|
467
512
|
#
|
513
|
+
# NOTE: Do not set index:true. The remote key column always must be unique,
|
514
|
+
# so it gets a unique index automatically.
|
515
|
+
#
|
468
516
|
# @abstract
|
469
517
|
# @return [Webhookdb::Replicator::Column]
|
470
518
|
def _remote_key_column
|
@@ -495,9 +543,16 @@ for information on how to refresh data.)
|
|
495
543
|
end
|
496
544
|
self._extra_index_specs.each do |spec|
|
497
545
|
targets = spec.columns.map { |n| dba_cols_by_name.fetch(n) }
|
498
|
-
idx_name = self.index_name(targets)
|
546
|
+
idx_name = self.index_name(targets, identifier: spec.identifier)
|
499
547
|
result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: spec.where)
|
500
548
|
end
|
549
|
+
index_names = result.map(&:name)
|
550
|
+
if (dupes = index_names.find_all.with_index { |n, idx| idx != index_names.rindex(n) }).any?
|
551
|
+
msg = "Duplicate index names detected. Use the 'name' attribute to differentiate: " +
|
552
|
+
dupes.map(&:to_s).join(", ")
|
553
|
+
raise Webhookdb::Replicator::BrokenSpecification, msg
|
554
|
+
end
|
555
|
+
|
501
556
|
return result
|
502
557
|
end
|
503
558
|
|
@@ -520,7 +575,7 @@ for information on how to refresh data.)
|
|
520
575
|
|
521
576
|
# @return [Webhookdb::Replicator::SchemaModification]
|
522
577
|
def ensure_all_columns_modification
|
523
|
-
existing_cols, existing_indices = nil
|
578
|
+
existing_cols, existing_indices, existing_partitions = nil
|
524
579
|
max_pk = 0
|
525
580
|
sint = self.service_integration
|
526
581
|
self.admin_dataset do |ds|
|
@@ -531,6 +586,7 @@ for information on how to refresh data.)
|
|
531
586
|
tablename: sint.table_name,
|
532
587
|
).select_map(:indexname).to_set
|
533
588
|
max_pk = ds.max(:pk) || 0
|
589
|
+
existing_partitions = self.existing_partitions(ds.db)
|
534
590
|
end
|
535
591
|
adapter = Webhookdb::DBAdapter::PG.new
|
536
592
|
table = self.dbadapter_table
|
@@ -577,7 +633,9 @@ for information on how to refresh data.)
|
|
577
633
|
# Add missing indices. This should happen AFTER the UPDATE calls so the UPDATEs don't have to update indices.
|
578
634
|
self.indices(table).map do |index|
|
579
635
|
next if existing_indices.include?(index.name.to_s)
|
580
|
-
result.nontransaction_statements
|
636
|
+
result.nontransaction_statements.concat(
|
637
|
+
adapter.create_index_sqls(index, concurrently: true, partitions: existing_partitions),
|
638
|
+
)
|
581
639
|
end
|
582
640
|
|
583
641
|
result.application_database_statements << sint.ensure_sequence_sql if self.requires_sequence?
|
@@ -641,6 +699,7 @@ for information on how to refresh data.)
|
|
641
699
|
# like when we have to take different action based on a request method.
|
642
700
|
#
|
643
701
|
# @param body [Hash]
|
702
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
644
703
|
def upsert_webhook_body(body, **kw)
|
645
704
|
return self.upsert_webhook(Webhookdb::Replicator::WebhookRequest.new(body:), **kw)
|
646
705
|
end
|
@@ -649,10 +708,14 @@ for information on how to refresh data.)
|
|
649
708
|
# NOT a Rack::Request.
|
650
709
|
#
|
651
710
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
711
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
652
712
|
def upsert_webhook(request, **kw)
|
653
713
|
return self._upsert_webhook(request, **kw)
|
714
|
+
rescue Amigo::Retry::Error
|
715
|
+
# Do not log this since it's expected/handled by Amigo
|
716
|
+
raise
|
654
717
|
rescue StandardError => e
|
655
|
-
self.logger.error("upsert_webhook_error", request: request.as_json,
|
718
|
+
self.logger.error("upsert_webhook_error", {request: request.as_json}, e)
|
656
719
|
raise
|
657
720
|
end
|
658
721
|
|
@@ -661,9 +724,23 @@ for information on how to refresh data.)
|
|
661
724
|
#
|
662
725
|
# @param request [Webhookdb::Replicator::WebhookRequest]
|
663
726
|
# @param upsert [Boolean] If false, just return what would be upserted.
|
727
|
+
# @return [Array,Hash] Inserted rows, or array of inserted rows if many.
|
664
728
|
def _upsert_webhook(request, upsert: true)
|
665
|
-
|
666
|
-
return nil if
|
729
|
+
resource_or_list, event = self._resource_and_event(request)
|
730
|
+
return nil if resource_or_list.nil?
|
731
|
+
if resource_or_list.is_a?(Array)
|
732
|
+
unless event.nil?
|
733
|
+
msg = "resource_and_event cannot return an array of resources with a non-nil event"
|
734
|
+
raise Webhookdb::InvalidPostcondition, msg
|
735
|
+
end
|
736
|
+
return resource_or_list.map do |resource|
|
737
|
+
self._upsert_webhook_single_resource(request, resource:, event:, upsert:)
|
738
|
+
end
|
739
|
+
end
|
740
|
+
return self._upsert_webhook_single_resource(request, resource: resource_or_list, event:, upsert:)
|
741
|
+
end
|
742
|
+
|
743
|
+
def _upsert_webhook_single_resource(request, resource:, event:, upsert:)
|
667
744
|
enrichment = self._fetch_enrichment(resource, event, request)
|
668
745
|
prepared = self._prepare_for_insert(resource, event, request, enrichment)
|
669
746
|
raise Webhookdb::InvalidPostcondition if prepared.key?(:data)
|
@@ -673,12 +750,11 @@ for information on how to refresh data.)
|
|
673
750
|
inserting[:enrichment] = self._to_json(enrichment) if self._store_enrichment_body?
|
674
751
|
inserting.merge!(prepared)
|
675
752
|
return inserting unless upsert
|
676
|
-
remote_key_col = self._remote_key_column
|
677
753
|
updating = self._upsert_update_expr(inserting, enrichment:)
|
678
754
|
update_where = self._update_where_expr
|
679
755
|
upserted_rows = self.admin_dataset(timeout: :fast) do |ds|
|
680
756
|
ds.insert_conflict(
|
681
|
-
target:
|
757
|
+
target: self._upsert_conflict_target,
|
682
758
|
update: updating,
|
683
759
|
update_where:,
|
684
760
|
).insert(inserting)
|
@@ -689,6 +765,12 @@ for information on how to refresh data.)
|
|
689
765
|
return inserting
|
690
766
|
end
|
691
767
|
|
768
|
+
# The target for ON CONFLICT. Usually the remote key column name,
|
769
|
+
# except if the remote id is a compound unique index, like for partitioned tables.
|
770
|
+
# Can be a symbol, array of symbols representing the column names, a +Sequel.lit+, etc.
|
771
|
+
# See +Sequel::Dataset.insert_conflict+ :target option for details.
|
772
|
+
def _upsert_conflict_target = self._remote_key_column.name
|
773
|
+
|
692
774
|
# The NULL ASCII character (\u0000), when present in a string ("\u0000"),
|
693
775
|
# and then encoded into JSON ("\\u0000") is invalid in PG JSONB- its strings cannot contain NULLs
|
694
776
|
# (note that JSONB does not store the encoded string verbatim, it parses it into PG types, and a PG string
|
@@ -793,7 +875,7 @@ for information on how to refresh data.)
|
|
793
875
|
#
|
794
876
|
# @abstract
|
795
877
|
# @param [Webhookdb::Replicator::WebhookRequest] request
|
796
|
-
# @return [Array<Hash>,nil]
|
878
|
+
# @return [Array<Hash,Array>,nil]
|
797
879
|
def _resource_and_event(request)
|
798
880
|
raise NotImplementedError
|
799
881
|
end
|
@@ -903,10 +985,10 @@ for information on how to refresh data.)
|
|
903
985
|
# - The table OID for this replicator
|
904
986
|
# - The given key
|
905
987
|
#
|
906
|
-
# Note this
|
988
|
+
# Note this establishes a new DB connection for the advisory lock;
|
907
989
|
# we have had issues with advisory locks on reused connections,
|
908
990
|
# and this is safer than having a lock that is never released.
|
909
|
-
|
991
|
+
def with_advisory_lock(key, &)
|
910
992
|
url = self.service_integration.organization.admin_connection_url_raw
|
911
993
|
got = nil
|
912
994
|
Webhookdb::Dbutil.borrow_conn(url) do |conn|
|
@@ -966,7 +1048,7 @@ for information on how to refresh data.)
|
|
966
1048
|
rescue TypeError, NoMethodError => e
|
967
1049
|
# if we don't incur an HTTP error, but do incur an Error due to differences in the shapes of anticipated
|
968
1050
|
# response data in the `fetch_backfill_page` function, we can assume that the credentials are okay
|
969
|
-
self.logger.info "verify_backfill_credentials_expected_failure",
|
1051
|
+
self.logger.info "verify_backfill_credentials_expected_failure", e
|
970
1052
|
return CredentialVerificationResult.new(verified: true, message: "")
|
971
1053
|
end
|
972
1054
|
return CredentialVerificationResult.new(verified: true, message: "")
|
@@ -999,42 +1081,18 @@ for information on how to refresh data.)
|
|
999
1081
|
job.update(started_at: Time.now)
|
1000
1082
|
|
1001
1083
|
backfillers = self._backfillers(**job.criteria.symbolize_keys)
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
# Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
|
1008
|
-
errors = Array.new(backfillers.size)
|
1009
|
-
backfillers.each_with_index do |bf, idx|
|
1010
|
-
pool.post do
|
1011
|
-
bf.backfill(last_backfilled)
|
1012
|
-
rescue StandardError => e
|
1013
|
-
errors[idx] = e
|
1014
|
-
end
|
1015
|
-
end
|
1016
|
-
# We've enqueued all backfillers; do not accept anymore work.
|
1017
|
-
pool.shutdown
|
1018
|
-
loop do
|
1019
|
-
# We want to stop early if we find an error, so check for errors every 10 seconds.
|
1020
|
-
completed = pool.wait_for_termination(10)
|
1021
|
-
first_error = errors.find { |e| !e.nil? }
|
1022
|
-
if first_error.nil?
|
1023
|
-
# No error, and wait_for_termination returned true, so all work is done.
|
1024
|
-
break if completed
|
1025
|
-
# No error, but work is still going on, so loop again.
|
1026
|
-
next
|
1027
|
-
end
|
1028
|
-
# We have an error; don't run any more backfillers.
|
1029
|
-
pool.kill
|
1030
|
-
# Wait for all ongoing backfills before raising.
|
1031
|
-
pool.wait_for_termination
|
1032
|
-
raise first_error
|
1084
|
+
begin
|
1085
|
+
if self._parallel_backfill && self._parallel_backfill > 1
|
1086
|
+
_do_parallel_backfill(backfillers, last_backfilled)
|
1087
|
+
else
|
1088
|
+
_do_serial_backfill(backfillers, last_backfilled)
|
1033
1089
|
end
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1090
|
+
rescue StandardError => e
|
1091
|
+
if self.on_backfill_error(e) == true
|
1092
|
+
job.update(finished_at: Time.now)
|
1093
|
+
return
|
1037
1094
|
end
|
1095
|
+
raise e
|
1038
1096
|
end
|
1039
1097
|
|
1040
1098
|
sint.update(last_backfilled_at: new_last_backfilled) if job.incremental?
|
@@ -1042,6 +1100,54 @@ for information on how to refresh data.)
|
|
1042
1100
|
job.enqueue_children
|
1043
1101
|
end
|
1044
1102
|
|
1103
|
+
protected def _do_parallel_backfill(backfillers, last_backfilled)
|
1104
|
+
# Create a dedicated threadpool for these backfillers,
|
1105
|
+
# with max parallelism determined by the replicator.
|
1106
|
+
pool = Concurrent::FixedThreadPool.new(self._parallel_backfill)
|
1107
|
+
# Record any errors that occur, since they won't raise otherwise.
|
1108
|
+
# Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
|
1109
|
+
errors = Array.new(backfillers.size)
|
1110
|
+
backfillers.each_with_index do |bf, idx|
|
1111
|
+
pool.post do
|
1112
|
+
bf.backfill(last_backfilled)
|
1113
|
+
rescue StandardError => e
|
1114
|
+
errors[idx] = e
|
1115
|
+
end
|
1116
|
+
end
|
1117
|
+
# We've enqueued all backfillers; do not accept anymore work.
|
1118
|
+
pool.shutdown
|
1119
|
+
loop do
|
1120
|
+
# We want to stop early if we find an error, so check for errors every 10 seconds.
|
1121
|
+
completed = pool.wait_for_termination(10)
|
1122
|
+
first_error = errors.find { |e| !e.nil? }
|
1123
|
+
if first_error.nil?
|
1124
|
+
# No error, and wait_for_termination returned true, so all work is done.
|
1125
|
+
break if completed
|
1126
|
+
# No error, but work is still going on, so loop again.
|
1127
|
+
next
|
1128
|
+
end
|
1129
|
+
# We have an error; don't run any more backfillers.
|
1130
|
+
pool.kill
|
1131
|
+
# Wait for all ongoing backfills before raising.
|
1132
|
+
pool.wait_for_termination
|
1133
|
+
raise first_error
|
1134
|
+
end
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
protected def _do_serial_backfill(backfillers, last_backfilled)
|
1138
|
+
backfillers.each do |backfiller|
|
1139
|
+
backfiller.backfill(last_backfilled)
|
1140
|
+
end
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
# Called when the #backfill method errors.
|
1144
|
+
# This can do something like dispatch a developer alert.
|
1145
|
+
# The handler must raise in order to stop the job from processing-
|
1146
|
+
# if nothing is raised, the original exception will be raised instead.
|
1147
|
+
# By default, this method noops, so the original exception is raised.
|
1148
|
+
# @param e [Exception]
|
1149
|
+
def on_backfill_error(e) = nil
|
1150
|
+
|
1045
1151
|
# If this replicator supports backfilling in parallel (running multiple backfillers at a time),
|
1046
1152
|
# return the degree of paralellism (or nil if not running in parallel).
|
1047
1153
|
# We leave parallelism up to the replicator, not CPU count, since most work
|
@@ -1096,15 +1202,15 @@ for information on how to refresh data.)
|
|
1096
1202
|
|
1097
1203
|
def fetch_backfill_page(pagination_token, last_backfilled:)
|
1098
1204
|
return @svc._fetch_backfill_page(pagination_token, last_backfilled:)
|
1099
|
-
rescue ::Timeout::Error, ::SocketError
|
1100
|
-
self.__retryordie
|
1205
|
+
rescue ::Timeout::Error, ::SocketError => e
|
1206
|
+
self.__retryordie(e)
|
1101
1207
|
rescue Webhookdb::Http::Error => e
|
1102
|
-
self.__retryordie if e.status >= 500
|
1208
|
+
self.__retryordie(e) if e.status >= 500
|
1103
1209
|
raise
|
1104
1210
|
end
|
1105
1211
|
|
1106
|
-
def __retryordie
|
1107
|
-
raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff)
|
1212
|
+
def __retryordie(e)
|
1213
|
+
raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff, e)
|
1108
1214
|
end
|
1109
1215
|
end
|
1110
1216
|
|
@@ -1163,6 +1269,34 @@ or leave blank to choose the first option.
|
|
1163
1269
|
return self._webhook_endpoint
|
1164
1270
|
end
|
1165
1271
|
|
1272
|
+
# Avoid writes under the following conditions:
|
1273
|
+
#
|
1274
|
+
# - A table lock is taken on the table
|
1275
|
+
# - A vacuum is in progress on the table
|
1276
|
+
#
|
1277
|
+
# Of course, in most situations we want to write anyway,
|
1278
|
+
# but there are some cases (lower-priority replicators for example)
|
1279
|
+
# where we can reschedule the job to happen in the future instead.
|
1280
|
+
def avoid_writes?
|
1281
|
+
# We will need to handle this differently when not under Postgres, but for now,
|
1282
|
+
# just assume Postgres.
|
1283
|
+
# Find the admin URL for the organization's server (NOT the organization admin url, it can't see system processes).
|
1284
|
+
# Then check for 1) vacuums in progress, 2) locks.
|
1285
|
+
self.service_integration.organization.readonly_connection do |db|
|
1286
|
+
count = db[:pg_locks].
|
1287
|
+
join(:pg_class, {oid: :relation}).
|
1288
|
+
join(:pg_namespace, {oid: :relnamespace}).
|
1289
|
+
where(
|
1290
|
+
locktype: "relation",
|
1291
|
+
nspname: self.service_integration.organization.replication_schema,
|
1292
|
+
relname: self.service_integration.table_name,
|
1293
|
+
mode: ["ShareUpdateExclusiveLock", "ExclusiveLock", "AccessExclusiveLock"],
|
1294
|
+
).limit(1).count
|
1295
|
+
return true if count&.positive?
|
1296
|
+
end
|
1297
|
+
return false
|
1298
|
+
end
|
1299
|
+
|
1166
1300
|
protected def _webhook_endpoint
|
1167
1301
|
return self.service_integration.unauthed_webhook_endpoint
|
1168
1302
|
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Delete stale rows (like cancelled calendar events) not updated (row_updated_at or whatever column)
|
4
|
+
# in the window between +stale_at+ back to +lookback_window+.
|
5
|
+
# This avoids endlessly adding to a table where we expect rows to become stale over time.
|
6
|
+
class Webhookdb::Replicator::BaseStaleRowDeleter
|
7
|
+
# @return [Webhookdb::Replicator::Base]
|
8
|
+
attr_reader :replicator
|
9
|
+
|
10
|
+
def initialize(replicator)
|
11
|
+
@replicator = replicator
|
12
|
+
end
|
13
|
+
|
14
|
+
# When a row is considered 'stale'.
|
15
|
+
# For example, a value of +35.days+ would treat any row older than 35 days as stale.
|
16
|
+
# @return [ActiveSupport::Duration]
|
17
|
+
def stale_at
|
18
|
+
raise NotImplementedError
|
19
|
+
end
|
20
|
+
|
21
|
+
# How far from +stale_at+ to "look back" for stale rows.
|
22
|
+
# We cannot just use "row_updated_at < stale_at" since this would scan ALL the rows
|
23
|
+
# every time we delete rows. Instead, we only want to scale rows where
|
24
|
+
# "row_updated_at < stale_at AND row_updated_at > (stale_at - lookback_window)".
|
25
|
+
# For example, a +stale_at+ of 20 days and a +lookback_window+ of 7 days
|
26
|
+
# would look to delete rows 20 to 27 days old.
|
27
|
+
#
|
28
|
+
# If the stale row deleter is run daily, a good lookback window would be 2-3 days,
|
29
|
+
# since as long as the job is running we shouldn't find rows that aren't cleaned up.
|
30
|
+
#
|
31
|
+
# Use +run_initial+ to do a full table scan,
|
32
|
+
# which may be necessary when running this feature for a table for the first time.
|
33
|
+
# @return [ActiveSupport::Duration]
|
34
|
+
def lookback_window
|
35
|
+
raise NotImplementedError
|
36
|
+
end
|
37
|
+
|
38
|
+
# Name of the column, like +:row_updated_at+.
|
39
|
+
# @return [Symbol]
|
40
|
+
def updated_at_column
|
41
|
+
raise NotImplementedError
|
42
|
+
end
|
43
|
+
|
44
|
+
# Other additional 'stale' conditions, like {status: 'cancelled'}
|
45
|
+
# @return [Hash]
|
46
|
+
def stale_condition
|
47
|
+
raise NotImplementedError
|
48
|
+
end
|
49
|
+
|
50
|
+
# The row delete is done in chunks to avoid long locks.
|
51
|
+
# The default seems safe, but it's exposed if you need to play around with it,
|
52
|
+
# and can be done via configuration if needed at some point.
|
53
|
+
# @return [Integer]
|
54
|
+
def chunk_size = 10_000
|
55
|
+
|
56
|
+
# How small should the incremental lookback window be? See +run+ for details.
|
57
|
+
# A size of 1 hour, and a lookback window of 2 days, would yield at least 48 delete queries.
|
58
|
+
def incremental_lookback_size = 1.hour
|
59
|
+
|
60
|
+
# Run the deleter.
|
61
|
+
# @param lookback_window [nil,ActiveSupport::Duration] The lookback window
|
62
|
+
# (how many days before +stale_cutoff+ to look for rows). Use +nil+ to look for all rows.
|
63
|
+
def run(lookback_window: self.lookback_window)
|
64
|
+
# The algorithm to delete stale rows is complex for a couple of reasons.
|
65
|
+
# The native solution is "delete rows where updated_at > (stale_at - lookback_window) AND updated_at < stale_at"
|
66
|
+
# However, this would cause a single massive query over the entire candidate row space,
|
67
|
+
# which has problems:
|
68
|
+
# - The query can be very slow
|
69
|
+
# - Deadlocks can happen due to the slow query.
|
70
|
+
# - If the query is interrupted (due to a worker restart), all progress is lost.
|
71
|
+
# - Scanning the large 'updated at timestamp' index can cause the database to do a sequential scan.
|
72
|
+
#
|
73
|
+
# Instead, we need to do issue a series of fast queries over small 'updated at' windows:
|
74
|
+
#
|
75
|
+
# - Break the lookback period into hour-long windows.
|
76
|
+
# If the lookback_window is 2 days, this would issue 48 queries.
|
77
|
+
# But each one would be very fast, since the column is indexed.
|
78
|
+
# - For each small window, delete in chunks, like:
|
79
|
+
# DELETE from "public"."icalendar_event_v1_aaaa"
|
80
|
+
# WHERE pk IN (
|
81
|
+
# SELECT pk FROM "public"."icalendar_event_v1_aaaa"
|
82
|
+
# WHERE row_updated_at >= (hour start)
|
83
|
+
# AND row_updated_at < (hour end)
|
84
|
+
# LIMIT (chunk size)
|
85
|
+
# )
|
86
|
+
# - Issue each DELETE within a transaction with seqscan disabled.
|
87
|
+
# This is crude, but we know for our usage case that we never want a seqscan.
|
88
|
+
# - Using the chunked delete with the hour-long (small-sized) windows
|
89
|
+
# is important. Because each chunk requires scanning potentially the entire indexed row space,
|
90
|
+
# it would take longer and longer to find 10k rows to fill the chunk.
|
91
|
+
# This is, for example, the same performance problem that OFFSET/LIMIT pagination
|
92
|
+
# has at later pages (but not earlier pages).
|
93
|
+
self.replicator.admin_dataset do |ds|
|
94
|
+
stale_window_late = Time.now - self.stale_at
|
95
|
+
stale_window_early = lookback_window.nil? ? ds.min(self.updated_at_column) : stale_window_late - lookback_window
|
96
|
+
# If we are querying the whole table (no lookback window), and have no rows,
|
97
|
+
# there's nothing to clean up.
|
98
|
+
break if stale_window_early.nil?
|
99
|
+
|
100
|
+
# We must disable vacuuming for this sort of cleanup.
|
101
|
+
# Otherwise, it will take a LONG time since we use a series of short deletes.
|
102
|
+
self.set_autovacuum(ds.db, false)
|
103
|
+
if self.replicator.partition?
|
104
|
+
# If the replicator is partitioned, we need to delete stale rows on partition separately.
|
105
|
+
# We DELETE with a LIMIT in chunks, but when we run this on the main table, it'll run the query
|
106
|
+
# on every partition BEFORE applying the limit. You'll see this manifest with speed,
|
107
|
+
# but also the planner using a sequential scan for the delete, rather than hitting an index.
|
108
|
+
# Instead, DELETE from each partition in chunks, which will use the indices, and apply the limit properly.
|
109
|
+
self.replicator.existing_partitions(ds.db).each do |p|
|
110
|
+
pdb = ds.db[self.replicator.qualified_table_sequel_identifier(table: p.partition_name)]
|
111
|
+
self._run_delete(pdb, stale_window_early:, stale_window_late:)
|
112
|
+
end
|
113
|
+
else
|
114
|
+
self._run_delete(ds, stale_window_early:, stale_window_late:)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
ensure
|
118
|
+
# Open a new connection in case the previous one is trashed for whatever reason.
|
119
|
+
self.replicator.admin_dataset do |ds|
|
120
|
+
self.set_autovacuum(ds.db, true)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def _run_delete(ds, stale_window_early:, stale_window_late:)
|
125
|
+
base_ds = ds.where(self.stale_condition).limit(self.chunk_size).select(:pk)
|
126
|
+
window_start = stale_window_early
|
127
|
+
until window_start >= stale_window_late
|
128
|
+
window_end = window_start + self.incremental_lookback_size
|
129
|
+
inner_ds = base_ds.where(self.updated_at_column => window_start..window_end)
|
130
|
+
loop do
|
131
|
+
# Due to conflicts where a feed is being inserted while the delete is happening,
|
132
|
+
# this may raise an error like:
|
133
|
+
# deadlock detected
|
134
|
+
# DETAIL: Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
|
135
|
+
# Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
|
136
|
+
# HINT: See server log for query details.
|
137
|
+
# CONTEXT: while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
|
138
|
+
# So we don't explicitly handle deadlocks, but could if it becomes an issue.
|
139
|
+
delete_ds = ds.where(pk: inner_ds)
|
140
|
+
# Disable seqscan for the delete. We can end up with seqscans if the planner decides
|
141
|
+
# it's a better choice given the 'updated at' index, but for our purposes we know
|
142
|
+
# we never want to use it (the impact is negligible on small tables,
|
143
|
+
# and catastrophic on large tables).
|
144
|
+
sql_lines = [
|
145
|
+
"BEGIN",
|
146
|
+
"SET LOCAL enable_seqscan='off'",
|
147
|
+
delete_ds.delete_sql,
|
148
|
+
"COMMIT",
|
149
|
+
]
|
150
|
+
deleted = ds.db << sql_lines.join(";\n")
|
151
|
+
break if deleted != self.chunk_size
|
152
|
+
end
|
153
|
+
window_start = window_end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def set_autovacuum(db, on)
|
158
|
+
return if self.replicator.partition?
|
159
|
+
arg = on ? "on" : "off"
|
160
|
+
db << "ALTER TABLE #{self.replicator.schema_and_table_symbols.join('.')} SET (autovacuum_enabled='#{arg}')"
|
161
|
+
end
|
162
|
+
|
163
|
+
# Run with +lookback_window+ as +nil+, which does a full table scan.
|
164
|
+
def run_initial = self.run(lookback_window: nil)
|
165
|
+
end
|
@@ -349,6 +349,8 @@ class Webhookdb::Replicator::Column
|
|
349
349
|
|
350
350
|
# If provided, use this expression as the UPDATE value when adding the column
|
351
351
|
# to an existing table.
|
352
|
+
# To explicitly backfill using NULL, use the value +Sequel[nil]+
|
353
|
+
# rather than +nil+.
|
352
354
|
# @return [String,Sequel,Sequel::SQL::Expression]
|
353
355
|
attr_reader :backfill_expr
|
354
356
|
|