webhookdb 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/admin-dist/assets/{index-6aebf805.js → index-9306dd28.js} +39 -39
- data/admin-dist/index.html +1 -1
- data/data/messages/templates/errors/generic_backfill.email.liquid +30 -0
- data/data/messages/templates/errors/icalendar_fetch.email.liquid +8 -2
- data/data/messages/templates/specs/with_fields.email.liquid +6 -0
- data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
- data/db/migrations/032_remove_db_defaults.rb +2 -0
- data/db/migrations/043_text_search.rb +2 -0
- data/db/migrations/045_system_log.rb +15 -0
- data/db/migrations/046_indices.rb +14 -0
- data/db/migrations/047_sync_parallelism.rb +9 -0
- data/db/migrations/048_sync_stats.rb +9 -0
- data/db/migrations/049_error_handlers.rb +18 -0
- data/db/migrations/050_logged_webhook_indices.rb +25 -0
- data/db/migrations/051_partitioning.rb +9 -0
- data/integration/async_spec.rb +0 -2
- data/integration/service_integrations_spec.rb +0 -2
- data/lib/amigo/durable_job.rb +2 -2
- data/lib/amigo/job_in_context.rb +12 -0
- data/lib/webhookdb/admin.rb +6 -0
- data/lib/webhookdb/admin_api/data_provider.rb +1 -0
- data/lib/webhookdb/admin_api/entities.rb +8 -0
- data/lib/webhookdb/aggregate_result.rb +1 -1
- data/lib/webhookdb/api/entities.rb +6 -2
- data/lib/webhookdb/api/error_handlers.rb +104 -0
- data/lib/webhookdb/api/helpers.rb +25 -1
- data/lib/webhookdb/api/icalproxy.rb +22 -0
- data/lib/webhookdb/api/install.rb +2 -1
- data/lib/webhookdb/api/organizations.rb +6 -0
- data/lib/webhookdb/api/saved_queries.rb +1 -0
- data/lib/webhookdb/api/saved_views.rb +1 -0
- data/lib/webhookdb/api/service_integrations.rb +2 -1
- data/lib/webhookdb/api/sync_targets.rb +1 -1
- data/lib/webhookdb/api/system.rb +5 -0
- data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
- data/lib/webhookdb/api.rb +4 -1
- data/lib/webhookdb/apps.rb +4 -0
- data/lib/webhookdb/async/autoscaler.rb +10 -0
- data/lib/webhookdb/async/job.rb +4 -0
- data/lib/webhookdb/async/scheduled_job.rb +4 -0
- data/lib/webhookdb/async.rb +2 -0
- data/lib/webhookdb/backfiller.rb +17 -4
- data/lib/webhookdb/concurrent.rb +96 -0
- data/lib/webhookdb/connection_cache.rb +57 -10
- data/lib/webhookdb/console.rb +1 -1
- data/lib/webhookdb/customer/reset_code.rb +1 -1
- data/lib/webhookdb/customer.rb +5 -4
- data/lib/webhookdb/database_document.rb +1 -1
- data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
- data/lib/webhookdb/db_adapter/partition.rb +14 -0
- data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
- data/lib/webhookdb/db_adapter/pg.rb +77 -5
- data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
- data/lib/webhookdb/db_adapter.rb +25 -3
- data/lib/webhookdb/dbutil.rb +2 -0
- data/lib/webhookdb/errors.rb +34 -0
- data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
- data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
- data/lib/webhookdb/http.rb +30 -16
- data/lib/webhookdb/icalendar.rb +30 -9
- data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
- data/lib/webhookdb/jobs/backfill.rb +21 -25
- data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
- data/lib/webhookdb/jobs/deprecated_jobs.rb +3 -0
- data/lib/webhookdb/jobs/emailer.rb +2 -1
- data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
- data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
- data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
- data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
- data/lib/webhookdb/jobs/{logged_webhook_replay.rb → logged_webhooks_replay.rb} +5 -3
- data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
- data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +112 -0
- data/lib/webhookdb/jobs/monitor_metrics.rb +29 -0
- data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
- data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
- data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
- data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
- data/lib/webhookdb/jobs/process_webhook.rb +11 -12
- data/lib/webhookdb/jobs/renew_watch_channel.rb +10 -10
- data/lib/webhookdb/jobs/replication_migration.rb +5 -2
- data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
- data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
- data/lib/webhookdb/jobs/send_invite.rb +3 -2
- data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
- data/lib/webhookdb/jobs/send_webhook.rb +4 -5
- data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
- data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
- data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
- data/lib/webhookdb/jobs/{webhook_subscription_delivery_attempt.rb → webhook_subscription_delivery_event.rb} +5 -8
- data/lib/webhookdb/liquid/expose.rb +1 -1
- data/lib/webhookdb/liquid/filters.rb +1 -1
- data/lib/webhookdb/liquid/partial.rb +2 -2
- data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
- data/lib/webhookdb/logged_webhook.rb +16 -2
- data/lib/webhookdb/message/email_transport.rb +1 -1
- data/lib/webhookdb/message/transport.rb +1 -1
- data/lib/webhookdb/message.rb +55 -4
- data/lib/webhookdb/messages/error_generic_backfill.rb +47 -0
- data/lib/webhookdb/messages/error_icalendar_fetch.rb +5 -0
- data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
- data/lib/webhookdb/messages/specs.rb +16 -0
- data/lib/webhookdb/organization/alerting.rb +56 -6
- data/lib/webhookdb/organization/database_migration.rb +2 -2
- data/lib/webhookdb/organization/db_builder.rb +5 -4
- data/lib/webhookdb/organization/error_handler.rb +141 -0
- data/lib/webhookdb/organization.rb +76 -10
- data/lib/webhookdb/postgres/model.rb +1 -0
- data/lib/webhookdb/postgres/model_utilities.rb +2 -0
- data/lib/webhookdb/postgres.rb +3 -4
- data/lib/webhookdb/replicator/base.rb +202 -68
- data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
- data/lib/webhookdb/replicator/column.rb +2 -0
- data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
- data/lib/webhookdb/replicator/fake.rb +106 -88
- data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +131 -61
- data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
- data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +197 -32
- data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
- data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
- data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_v1_mixin.rb +49 -6
- data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
- data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/signalwire_message_v1.rb +31 -1
- data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
- data/lib/webhookdb/replicator/transistor_episode_v1.rb +11 -5
- data/lib/webhookdb/replicator/webhook_request.rb +8 -0
- data/lib/webhookdb/replicator.rb +6 -3
- data/lib/webhookdb/service/helpers.rb +4 -0
- data/lib/webhookdb/service/middleware.rb +6 -2
- data/lib/webhookdb/service/view_api.rb +1 -1
- data/lib/webhookdb/service.rb +10 -10
- data/lib/webhookdb/service_integration.rb +19 -1
- data/lib/webhookdb/signalwire.rb +1 -1
- data/lib/webhookdb/spec_helpers/async.rb +0 -4
- data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
- data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +239 -64
- data/lib/webhookdb/spec_helpers.rb +1 -0
- data/lib/webhookdb/sync_target.rb +202 -34
- data/lib/webhookdb/system_log_event.rb +9 -0
- data/lib/webhookdb/tasks/admin.rb +1 -1
- data/lib/webhookdb/tasks/annotate.rb +1 -1
- data/lib/webhookdb/tasks/db.rb +13 -1
- data/lib/webhookdb/tasks/docs.rb +1 -1
- data/lib/webhookdb/tasks/fixture.rb +1 -1
- data/lib/webhookdb/tasks/message.rb +1 -1
- data/lib/webhookdb/tasks/regress.rb +1 -1
- data/lib/webhookdb/tasks/release.rb +1 -1
- data/lib/webhookdb/tasks/sidekiq.rb +1 -1
- data/lib/webhookdb/tasks/specs.rb +1 -1
- data/lib/webhookdb/version.rb +1 -1
- data/lib/webhookdb/webhook_subscription.rb +3 -4
- data/lib/webhookdb.rb +34 -8
- metadata +114 -64
- data/lib/webhookdb/jobs/customer_created_notify_internal.rb +0 -22
- data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
- data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
- /data/lib/webhookdb/jobs/{logged_webhook_resilient_replay.rb → logged_webhooks_resilient_replay.rb} +0 -0
- /data/lib/webhookdb/jobs/{webhook_resource_notify_integrations.rb → webhookdb_resource_notify_integrations.rb} +0 -0
@@ -74,6 +74,10 @@ The secret to use for signing is:
|
|
74
74
|
col.new(:row_updated_at, TIMESTAMP, index: true, optional: true, defaulter: :now),
|
75
75
|
col.new(:last_synced_at, TIMESTAMP, index: true, optional: true),
|
76
76
|
col.new(:ics_url, TEXT, converter: col.converter_gsub("^webcal", "https")),
|
77
|
+
col.new(:event_count, INTEGER, optional: true),
|
78
|
+
col.new(:feed_bytes, INTEGER, optional: true),
|
79
|
+
col.new(:last_sync_duration_ms, INTEGER, optional: true),
|
80
|
+
col.new(:last_fetch_context, OBJECT, optional: true),
|
77
81
|
]
|
78
82
|
end
|
79
83
|
|
@@ -108,7 +112,7 @@ The secret to use for signing is:
|
|
108
112
|
external_id = request.body.fetch("external_id")
|
109
113
|
case request_type
|
110
114
|
when "SYNC"
|
111
|
-
super
|
115
|
+
super
|
112
116
|
Webhookdb::Jobs::IcalendarSync.perform_async(self.service_integration.id, external_id)
|
113
117
|
return
|
114
118
|
when "DELETE"
|
@@ -118,14 +122,12 @@ The secret to use for signing is:
|
|
118
122
|
unless Webhookdb::RACK_ENV == "test"
|
119
123
|
raise "someone tried to use the special unit test google event type outside of unit tests"
|
120
124
|
end
|
121
|
-
return super
|
125
|
+
return super
|
122
126
|
else
|
123
127
|
raise ArgumentError, "Unknown request type: #{request_type}"
|
124
128
|
end
|
125
129
|
end
|
126
130
|
|
127
|
-
CLEANUP_SERVICE_NAMES = ["icalendar_event_v1"].freeze
|
128
|
-
|
129
131
|
def rows_needing_sync(dataset, now: Time.now)
|
130
132
|
cutoff = now - Webhookdb::Icalendar.sync_period_hours.hours
|
131
133
|
return dataset.where(Sequel[last_synced_at: nil] | Sequel.expr { last_synced_at < cutoff })
|
@@ -133,7 +135,7 @@ The secret to use for signing is:
|
|
133
135
|
|
134
136
|
def delete_data_for_external_id(external_id)
|
135
137
|
relevant_integrations = self.service_integration.recursive_dependents.
|
136
|
-
filter { |d|
|
138
|
+
filter { |d| Webhookdb::Icalendar::EVENT_REPLICATORS.include?(d.service_name) }
|
137
139
|
self.admin_dataset do |ds|
|
138
140
|
ds.db.transaction do
|
139
141
|
ds.where(external_id:).delete
|
@@ -154,7 +156,7 @@ The secret to use for signing is:
|
|
154
156
|
@now = now
|
155
157
|
end
|
156
158
|
|
157
|
-
def upsert_page_size =
|
159
|
+
def upsert_page_size = 2000
|
158
160
|
def conditional_upsert? = true
|
159
161
|
|
160
162
|
def prepare_body(body)
|
@@ -163,14 +165,50 @@ The secret to use for signing is:
|
|
163
165
|
end
|
164
166
|
end
|
165
167
|
|
166
|
-
def sync_row(row)
|
168
|
+
def sync_row(row, force: false, now: Time.now)
|
167
169
|
Appydays::Loggable.with_log_tags(icalendar_url: row.fetch(:ics_url)) do
|
170
|
+
last_synced_at = row.fetch(:last_synced_at)
|
171
|
+
should_sync = force ||
|
172
|
+
last_synced_at.nil? ||
|
173
|
+
# If a proxy is configured, we always want to try to sync,
|
174
|
+
# since this could have come from a webhook, but also the proxy feed refresh TTL
|
175
|
+
# is likely much lower than ICALENDAR_SYNC_PERIOD_HOURS so it's good to check on it.
|
176
|
+
# The check is very fast (should 304) so is safe to do relatively often.
|
177
|
+
Webhookdb::Icalendar.proxy_url.present? ||
|
178
|
+
last_synced_at < (now - Webhookdb::Icalendar.sync_period_hours.hours)
|
179
|
+
unless should_sync
|
180
|
+
self.logger.info("skip_sync_recently_synced", last_synced_at:)
|
181
|
+
return
|
182
|
+
end
|
168
183
|
self.with_advisory_lock(row.fetch(:pk)) do
|
169
|
-
|
170
|
-
if (dep = self.find_dependent(
|
171
|
-
|
184
|
+
start = Time.now
|
185
|
+
if (dep = self.find_dependent(Webhookdb::Icalendar::EVENT_REPLICATORS))
|
186
|
+
if dep.replicator.avoid_writes?
|
187
|
+
# Check if this table is being vacuumed/etc. We use this instead of a semaphore job,
|
188
|
+
# since it's a better fit for icalendar, which is pre-scheduled, rather than reactive.
|
189
|
+
# That is, when we receive webhooks, a semaphore job gives us a more predictable rate;
|
190
|
+
# but icalendar rate is negotiated in advance (when enqueing jobs),
|
191
|
+
# and we can be more 'helpful' to something like a vacuum by not running any jobs at all.
|
192
|
+
self.logger.info("skip_sync_table_locked")
|
193
|
+
raise Amigo::Retry::Retry, 60.seconds + (rand * 10.seconds)
|
194
|
+
end
|
195
|
+
processor = self._sync_row(row, dep, now:)
|
196
|
+
end
|
197
|
+
self.admin_dataset do |ds|
|
198
|
+
ds.where(pk: row.fetch(:pk)).
|
199
|
+
update(
|
200
|
+
last_synced_at: now,
|
201
|
+
event_count: processor&.upserted_identities&.count,
|
202
|
+
feed_bytes: processor&.read_bytes,
|
203
|
+
last_sync_duration_ms: (Time.now - start).in_milliseconds,
|
204
|
+
last_fetch_context: {
|
205
|
+
"hash" => processor&.feed_hash,
|
206
|
+
"content_type" => processor&.headers&.fetch("Content-Type", nil),
|
207
|
+
"content_length" => processor&.headers&.fetch("Content-Length", nil),
|
208
|
+
"etag" => processor&.headers&.fetch("Etag", nil),
|
209
|
+
}.to_json,
|
210
|
+
)
|
172
211
|
end
|
173
|
-
self.admin_dataset { |ds| ds.where(pk: row.fetch(:pk)).update(last_synced_at: now) }
|
174
212
|
end
|
175
213
|
end
|
176
214
|
end
|
@@ -179,14 +217,19 @@ The secret to use for signing is:
|
|
179
217
|
calendar_external_id = row.fetch(:external_id)
|
180
218
|
begin
|
181
219
|
request_url = self._clean_ics_url(row.fetch(:ics_url))
|
182
|
-
io =
|
183
|
-
rescue Down::Error,
|
220
|
+
io = self._make_ics_request(request_url, row.fetch(:last_fetch_context))
|
221
|
+
rescue Down::Error,
|
222
|
+
URI::InvalidURIError,
|
223
|
+
HTTPX::NativeResolveError,
|
224
|
+
HTTPX::InsecureRedirectError,
|
225
|
+
HTTPX::Connection::HTTP2::Error,
|
226
|
+
EOFError => e
|
184
227
|
self._handle_down_error(e, request_url:, calendar_external_id:)
|
185
228
|
return
|
186
229
|
end
|
187
230
|
|
188
231
|
upserter = Upserter.new(dep.replicator, calendar_external_id, now:)
|
189
|
-
processor = EventProcessor.new(io
|
232
|
+
processor = EventProcessor.new(io:, upserter:, headers: io.data[:headers])
|
190
233
|
processor.process
|
191
234
|
# Delete all the extra replicator rows, and cancel all the rows that weren't upserted.
|
192
235
|
dep.replicator.admin_dataset do |ds|
|
@@ -204,6 +247,25 @@ The secret to use for signing is:
|
|
204
247
|
row_updated_at: now,
|
205
248
|
)
|
206
249
|
end
|
250
|
+
return processor
|
251
|
+
end
|
252
|
+
|
253
|
+
def _make_ics_request(request_url, last_fetch_context)
|
254
|
+
# Some servers require a VERY explicit accept header,
|
255
|
+
# so tell them we prefer icalendar here.
|
256
|
+
# Using Httpx, Accept-Encoding is gzip,deflate
|
257
|
+
# which seems fine (server should use identity as worst case).
|
258
|
+
headers = {
|
259
|
+
"Accept" => "text/calendar,*/*",
|
260
|
+
}
|
261
|
+
headers["If-None-Match"] = last_fetch_context["etag"] if last_fetch_context & ["etag"]
|
262
|
+
if (proxy_url = Webhookdb::Icalendar.proxy_url).present?
|
263
|
+
request_url = "#{proxy_url.delete_suffix('/')}/?url=#{URI.encode_www_form_component(request_url)}"
|
264
|
+
headers["Authorization"] = "Apikey #{Webhookdb::Icalendar.proxy_api_key}" if
|
265
|
+
Webhookdb::Icalendar.proxy_api_key.present?
|
266
|
+
end
|
267
|
+
resp = Webhookdb::Http.chunked_download(request_url, rewindable: false, headers:)
|
268
|
+
return resp
|
207
269
|
end
|
208
270
|
|
209
271
|
# We get all sorts of strange urls, fix up what we can.
|
@@ -224,13 +286,31 @@ The secret to use for signing is:
|
|
224
286
|
self.logger.info("icalendar_fetch_not_modified", response_status: 304, request_url:, calendar_external_id:)
|
225
287
|
return
|
226
288
|
when Down::SSLError
|
227
|
-
|
228
|
-
|
289
|
+
# Most SSL errors are transient and can be retried, but some are due to a long-term misconfiguration.
|
290
|
+
# Handle these with an alert, like if we had a 404, which indicates a longer-term issue.
|
291
|
+
is_fatal =
|
292
|
+
# There doesn't appear to be a way to allow unsafe legacy content negotiation on a per-request basis,
|
293
|
+
# it is compiled into OpenSSL (may be wrong about this).
|
294
|
+
e.to_s.include?("unsafe legacy renegotiation disabled") ||
|
295
|
+
# Certificate failures are not transient
|
296
|
+
e.to_s.include?("certificate verify failed")
|
297
|
+
if is_fatal
|
298
|
+
response_status = 0
|
299
|
+
response_body = e.to_s
|
300
|
+
else
|
301
|
+
self._handle_retryable_down_error!(e, request_url:, calendar_external_id:)
|
302
|
+
end
|
303
|
+
when Down::TimeoutError, Down::ConnectionError, Down::InvalidUrl,
|
304
|
+
Errno::ECONNRESET,
|
305
|
+
URI::InvalidURIError,
|
306
|
+
HTTPX::NativeResolveError, HTTPX::InsecureRedirectError,
|
307
|
+
HTTPX::Connection::HTTP2::Error,
|
308
|
+
EOFError
|
229
309
|
response_status = 0
|
230
310
|
response_body = e.to_s
|
231
311
|
when Down::ClientError
|
232
312
|
raise e if e.response.nil?
|
233
|
-
response_status = e.response.
|
313
|
+
response_status = e.response.status.to_i
|
234
314
|
self._handle_retryable_down_error!(e, request_url:, calendar_external_id:) if
|
235
315
|
self._retryable_client_error?(e, request_url:)
|
236
316
|
# These are all the errors we've seen, we can't do anything about.
|
@@ -242,25 +322,32 @@ The secret to use for signing is:
|
|
242
322
|
404, 405, # Fundamental issues with the URL given
|
243
323
|
409, 410, # More access problems
|
244
324
|
417, # If someone uses an Outlook HTML calendar, fetch gives us a 417
|
325
|
+
422, # Sometimes used instead of 404
|
245
326
|
429, # Usually 429s are retried (as above), but in some cases they're not.
|
327
|
+
500, 503, 504, # Intermittent server issues, usually
|
328
|
+
599, # Represents a timeout in icalproxy
|
246
329
|
]
|
247
330
|
# For most client errors, we can't do anything about it. For example,
|
248
331
|
# and 'unshared' URL could result in a 401, 403, 404, or even a 405.
|
249
332
|
# For now, other client errors, we can raise on,
|
250
333
|
# in case it's something we can fix/work around.
|
251
334
|
# For example, it's possible something like a 415 is a WebhookDB issue.
|
335
|
+
if response_status == 421 && (origin_err = e.response.headers["Ical-Proxy-Origin-Error"])
|
336
|
+
response_status = origin_err.to_i
|
337
|
+
end
|
252
338
|
raise e unless expected_errors.include?(response_status)
|
253
|
-
response_body = e
|
339
|
+
response_body = self._safe_read_body(e)
|
254
340
|
when Down::ServerError
|
255
|
-
response_status = e.response.
|
256
|
-
response_body = e
|
341
|
+
response_status = e.response.status.to_i
|
342
|
+
response_body = self._safe_read_body(e)
|
257
343
|
else
|
258
344
|
response_body = nil
|
259
345
|
response_status = nil
|
260
346
|
end
|
261
347
|
raise e if response_status.nil?
|
348
|
+
loggable_body = response_body && response_body[..256]
|
262
349
|
self.logger.warn("icalendar_fetch_error",
|
263
|
-
response_body
|
350
|
+
response_body: loggable_body, response_status:, request_url:, calendar_external_id:,)
|
264
351
|
message = Webhookdb::Messages::ErrorIcalendarFetch.new(
|
265
352
|
self.service_integration,
|
266
353
|
calendar_external_id,
|
@@ -269,11 +356,19 @@ The secret to use for signing is:
|
|
269
356
|
request_url:,
|
270
357
|
request_method: "GET",
|
271
358
|
)
|
272
|
-
self.service_integration.organization.alerting.dispatch_alert(message)
|
359
|
+
self.service_integration.organization.alerting.dispatch_alert(message, separate_connection: false)
|
360
|
+
end
|
361
|
+
|
362
|
+
# We can hit an error while reading the error body, since it was opened as a stream.
|
363
|
+
# Ignore those errors.
|
364
|
+
def _safe_read_body(e)
|
365
|
+
return e.response.body.to_s
|
366
|
+
rescue OpenSSL::SSL::SSLError, HTTPX::Error
|
367
|
+
return "<error reading body>"
|
273
368
|
end
|
274
369
|
|
275
370
|
def _retryable_client_error?(e, request_url:)
|
276
|
-
code = e.response.
|
371
|
+
code = e.response.status.to_i
|
277
372
|
# This is a bad domain that returns 429 for most requests.
|
278
373
|
# Tell the org admins it won't sync.
|
279
374
|
return false if code == 429 && request_url.start_with?("https://ical.schedulestar.com")
|
@@ -290,7 +385,7 @@ The secret to use for signing is:
|
|
290
385
|
retry_in = rand(4..60).minutes
|
291
386
|
self.logger.debug(
|
292
387
|
"icalendar_fetch_error_retry",
|
293
|
-
response_status: e.respond_to?(:response) ? e.response&.
|
388
|
+
response_status: e.respond_to?(:response) ? e.response&.status : 0,
|
294
389
|
request_url:,
|
295
390
|
calendar_external_id:,
|
296
391
|
retry_at: Time.now + retry_in,
|
@@ -299,11 +394,12 @@ The secret to use for signing is:
|
|
299
394
|
end
|
300
395
|
|
301
396
|
class EventProcessor
|
302
|
-
attr_reader :upserted_identities
|
397
|
+
attr_reader :upserted_identities, :read_bytes, :headers
|
303
398
|
|
304
|
-
def initialize(io
|
399
|
+
def initialize(io:, upserter:, headers:)
|
305
400
|
@io = io
|
306
401
|
@upserter = upserter
|
402
|
+
@headers = headers
|
307
403
|
# Keep track of everything we upsert. For any rows we aren't upserting,
|
308
404
|
# delete them if they're recurring, or cancel them if they're not recurring.
|
309
405
|
# If doing it this way is slow, we could invert this (pull down all IDs and pop from the set).
|
@@ -316,8 +412,14 @@ The secret to use for signing is:
|
|
316
412
|
# We need to keep track of how many events each UID spawns,
|
317
413
|
# so we can delete any with a higher count.
|
318
414
|
@max_sequence_num_by_uid = {}
|
415
|
+
# Keep track of the bytes we've read from the file.
|
416
|
+
# Never trust Content-Length headers for ical feeds.
|
417
|
+
@read_bytes = 0
|
418
|
+
@feed_md5 = Digest::MD5.new
|
319
419
|
end
|
320
420
|
|
421
|
+
def feed_hash = @feed_md5.hexdigest
|
422
|
+
|
321
423
|
def delete_condition
|
322
424
|
return nil if @max_sequence_num_by_uid.empty?
|
323
425
|
return @max_sequence_num_by_uid.map do |uid, n|
|
@@ -456,7 +558,14 @@ The secret to use for signing is:
|
|
456
558
|
# The new UID has the sequence number.
|
457
559
|
e["UID"] = {"v" => "#{uid}-#{idx}"}
|
458
560
|
e["DTSTART"] = self._ical_entry_from_ruby(occ.start_time, start_entry, is_date)
|
459
|
-
|
561
|
+
if has_end_time
|
562
|
+
if !is_date && end_entry["VALUE"] == "DATE"
|
563
|
+
# It's possible that DTSTART is a time, but DTEND is a date. This makes no sense,
|
564
|
+
# so skip setting an end date. It will be in the :data column at least.
|
565
|
+
else
|
566
|
+
e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date)
|
567
|
+
end
|
568
|
+
end
|
460
569
|
yield e
|
461
570
|
final_sequence = idx
|
462
571
|
break if occ.start_time > dont_project_after
|
@@ -474,7 +583,15 @@ The secret to use for signing is:
|
|
474
583
|
def _ical_entry_from_ruby(r, entry, is_date)
|
475
584
|
return {"v" => r.strftime("%Y%m%d")} if is_date
|
476
585
|
return {"v" => r.strftime("%Y%m%dT%H%M%SZ")} if r.zone == "UTC"
|
477
|
-
|
586
|
+
tzid = entry["TZID"]
|
587
|
+
return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => tzid} if tzid
|
588
|
+
value = entry.fetch("v")
|
589
|
+
return {"v" => value} if value.end_with?("Z")
|
590
|
+
if /^\d{8}T\d{6}$/.match?(value)
|
591
|
+
@upserter.upserting_replicator.logger.warn "ical_assuming_utc_time", ical_entry: entry, ruby_time: r
|
592
|
+
return {"v" => "#{value}Z"}
|
593
|
+
end
|
594
|
+
raise "Cannot create ical entry from: '#{r}', #{entry}"
|
478
595
|
end
|
479
596
|
|
480
597
|
def _icecube_rule_from_ical(ical)
|
@@ -483,11 +600,20 @@ The secret to use for signing is:
|
|
483
600
|
# IceCube errors, because `day_of_month` isn't valid on a WeeklyRule.
|
484
601
|
# In this case, we need to sanitize the string to remove the offending rule piece.
|
485
602
|
# There are probably many other offending formats, but we'll add them here as needed.
|
603
|
+
unambiguous_ical = nil
|
486
604
|
if ical.include?("FREQ=WEEKLY") && ical.include?("BYMONTHDAY=")
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
605
|
+
unambiguous_ical = ical.gsub(/BYMONTHDAY=[\d,]+/, "")
|
606
|
+
elsif ical.include?("FREQ=MONTHLY") && ical.include?("BYYEARDAY=") && ical.include?("BYMONTHDAY=")
|
607
|
+
# Another rule: FREQ=MONTHLY;INTERVAL=3;BYYEARDAY=14;BYMONTHDAY=14
|
608
|
+
# Apple interprets this as monthly on the 14th; rrule.js interprets this as never happening.
|
609
|
+
# 'day_of_year' isn't valid on a MonthlyRule, so delete the BYYEARDAY component.
|
610
|
+
unambiguous_ical = ical.gsub(/BYYEARDAY=[\d,]+/, "")
|
611
|
+
end
|
612
|
+
if unambiguous_ical
|
613
|
+
unambiguous_ical.delete_prefix! ";"
|
614
|
+
unambiguous_ical.delete_suffix! ";"
|
615
|
+
unambiguous_ical.squeeze!(";")
|
616
|
+
ical = unambiguous_ical
|
491
617
|
end
|
492
618
|
return IceCube::IcalParser.rule_from_ical(ical)
|
493
619
|
end
|
@@ -507,6 +633,8 @@ The secret to use for signing is:
|
|
507
633
|
vevent_lines = []
|
508
634
|
in_vevent = false
|
509
635
|
while (line = @io.gets)
|
636
|
+
@read_bytes += line.size
|
637
|
+
@feed_md5.update(line)
|
510
638
|
begin
|
511
639
|
line.rstrip!
|
512
640
|
rescue Encoding::CompatibilityError
|
@@ -545,4 +673,41 @@ The secret to use for signing is:
|
|
545
673
|
@upserter.upserting_replicator.logger.warn("invalid_vevent_hash", vevent_uids: bad_event_uids.sort)
|
546
674
|
end
|
547
675
|
end
|
676
|
+
|
677
|
+
# Return true if the data in the feed has changed from what was last synced,
|
678
|
+
# or false if it has not so the sync can be skipped.
|
679
|
+
# This operation is meant to be resource-light (most of the work is the HTTP request),
|
680
|
+
# so should be done in a threadpool.
|
681
|
+
#
|
682
|
+
# - If we have no previous fetch context, we sync.
|
683
|
+
# - If the fetch errors, sync, because we want the normal error handler to figure it out
|
684
|
+
# (alert admins, etc).
|
685
|
+
# - If the last fetch's content type and length is different from the current, we sync.
|
686
|
+
# - Download the bytes. If the hash of the bytes is different from what was last processed,
|
687
|
+
# sync. Since this involves reading the streaming body, we must return a copy of the body (a StringIO).
|
688
|
+
def feed_changed?(row)
|
689
|
+
last_fetch = row.fetch(:last_fetch_context)
|
690
|
+
return true if last_fetch.nil? || last_fetch.empty?
|
691
|
+
|
692
|
+
begin
|
693
|
+
url = self._clean_ics_url(row.fetch(:ics_url))
|
694
|
+
resp = self._make_ics_request(url, last_fetch)
|
695
|
+
rescue Down::NotModified
|
696
|
+
return false
|
697
|
+
rescue StandardError
|
698
|
+
return true
|
699
|
+
end
|
700
|
+
headers = resp.data[:headers] || {}
|
701
|
+
content_type_match = headers["Content-Type"] == last_fetch["content_type"] &&
|
702
|
+
headers["Content-Length"] == last_fetch["content_length"]
|
703
|
+
return true unless content_type_match
|
704
|
+
last_hash = last_fetch["hash"]
|
705
|
+
return true if last_hash.nil?
|
706
|
+
|
707
|
+
hash = Digest::MD5.new
|
708
|
+
while (line = resp.gets)
|
709
|
+
hash.update(line)
|
710
|
+
end
|
711
|
+
return hash.hexdigest != last_hash
|
712
|
+
end
|
548
713
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "webhookdb/icalendar"
|
4
4
|
require "webhookdb/windows_tz"
|
5
|
+
require "webhookdb/replicator/base_stale_row_deleter"
|
5
6
|
|
6
7
|
class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
7
8
|
include Appydays::Loggable
|
@@ -115,7 +116,6 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
115
116
|
:compound_identity,
|
116
117
|
TEXT,
|
117
118
|
data_key: "<compound key, see converter>",
|
118
|
-
index: true,
|
119
119
|
converter: CONV_REMOTE_KEY,
|
120
120
|
optional: true, # This is done via the converter, data_key never exists
|
121
121
|
)
|
@@ -166,6 +166,7 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
166
166
|
data.delete("calendar_external_id")
|
167
167
|
data.delete("recurring_event_id")
|
168
168
|
data.delete("recurring_event_sequence")
|
169
|
+
data.delete("row_updated_at")
|
169
170
|
return data
|
170
171
|
end
|
171
172
|
|
@@ -215,11 +216,21 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
215
216
|
columns: [:calendar_external_id, :start_date, :end_date],
|
216
217
|
where: Sequel[:status].is_distinct_from("CANCELLED") & (Sequel[:start_date] !~ nil),
|
217
218
|
),
|
219
|
+
Webhookdb::Replicator::IndexSpec.new(
|
220
|
+
columns: [:row_updated_at],
|
221
|
+
where: Sequel[status: "CANCELLED"],
|
222
|
+
identifier: "cancelled_row_updated_at",
|
223
|
+
),
|
218
224
|
]
|
219
225
|
end
|
220
226
|
|
221
227
|
def _update_where_expr
|
222
|
-
|
228
|
+
# Compare against data to avoid the constant writes. JSONB != operations are very fast,
|
229
|
+
# so this should not be any real performance issue.
|
230
|
+
# last_modified_at is unreliable because LAST-MODIFIED is unreliable,
|
231
|
+
# even in feeds it is set. There are cases, such as adding an EXDATE to an RRULE,
|
232
|
+
# that do not trigger LAST-MODIFIED changes.
|
233
|
+
return self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
|
223
234
|
end
|
224
235
|
|
225
236
|
# @param [Array<String>] lines
|
@@ -369,50 +380,15 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
369
380
|
# +stale_at+ to +age_cutoff+. This avoids endlessly adding to the icalendar events table
|
370
381
|
# due to feeds that change UIDs each fetch- events with changed UIDs will become CANCELLED,
|
371
382
|
# and then deleted over time.
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
# Since this routine should run regularly, we should rarely have events more than 35 or 36 days old,
|
378
|
-
# for example.
|
379
|
-
# Use +nil+ to use no limit (a full table scan) which may be necessary when running this feature
|
380
|
-
# for the first time.
|
381
|
-
# @param chunk_size [Integer] The row delete is done in chunks to avoid long locks.
|
382
|
-
# The default seems safe, but it's exposed as a parameter if you need to play around with it,
|
383
|
-
# and can be done via configuration if needed at some point.
|
384
|
-
def delete_stale_cancelled_events(
|
385
|
-
stale_at: Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days.ago,
|
386
|
-
age_cutoff: (Webhookdb::Icalendar.stale_cancelled_event_threshold_days + 10).days.ago,
|
387
|
-
chunk_size: 10_000
|
388
|
-
)
|
389
|
-
# Delete in chunks, like:
|
390
|
-
# DELETE from "public"."icalendar_event_v1_aaaa"
|
391
|
-
# WHERE pk IN (
|
392
|
-
# SELECT pk FROM "public"."icalendar_event_v1_aaaa"
|
393
|
-
# WHERE row_updated_at < (now() - '35 days'::interval)
|
394
|
-
# LIMIT 10000
|
395
|
-
# )
|
396
|
-
age = age_cutoff..stale_at
|
397
|
-
self.admin_dataset do |ds|
|
398
|
-
chunk_ds = ds.where(row_updated_at: age, status: "CANCELLED").select(:pk).limit(chunk_size)
|
399
|
-
loop do
|
400
|
-
# Due to conflicts where a feed is being inserted while the delete is happening,
|
401
|
-
# this may raise an error like:
|
402
|
-
# deadlock detected
|
403
|
-
# DETAIL: Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
|
404
|
-
# Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
|
405
|
-
# HINT: See server log for query details.
|
406
|
-
# CONTEXT: while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
|
407
|
-
# Unit testing this is very difficult though, and in practice it is rare,
|
408
|
-
# and normal Sidekiq job retries should be sufficient to handle this.
|
409
|
-
# So we don't explicitly handle deadlocks, but could if it becomes an issue.
|
410
|
-
deleted = ds.where(pk: chunk_ds).delete
|
411
|
-
break if deleted != chunk_size
|
412
|
-
end
|
413
|
-
end
|
383
|
+
class StaleRowDeleter < Webhookdb::Replicator::BaseStaleRowDeleter
|
384
|
+
def stale_at = Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days
|
385
|
+
def lookback_window = Webhookdb::Icalendar.stale_cancelled_event_lookback_days.days
|
386
|
+
def updated_at_column = :row_updated_at
|
387
|
+
def stale_condition = {status: "CANCELLED"}
|
414
388
|
end
|
415
389
|
|
390
|
+
def stale_row_deleter = StaleRowDeleter.new(self)
|
391
|
+
|
416
392
|
def calculate_webhook_state_machine
|
417
393
|
if (step = self.calculate_dependency_state_machine_step(dependency_help: ""))
|
418
394
|
return step
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "webhookdb/replicator/icalendar_event_v1"
|
4
|
+
require "webhookdb/replicator/partitionable_mixin"
|
5
|
+
|
6
|
+
class Webhookdb::Replicator::IcalendarEventV1Partitioned < Webhookdb::Replicator::IcalendarEventV1
|
7
|
+
include Webhookdb::Replicator::PartitionableMixin
|
8
|
+
|
9
|
+
# @return [Webhookdb::Replicator::Descriptor]
|
10
|
+
def self.descriptor
|
11
|
+
return Webhookdb::Replicator::Descriptor.new(
|
12
|
+
name: "icalendar_event_v1_partitioned",
|
13
|
+
ctor: ->(sint) { self.new(sint) },
|
14
|
+
dependency_descriptor: Webhookdb::Replicator::IcalendarCalendarV1.descriptor,
|
15
|
+
feature_roles: ["partitioning_beta"],
|
16
|
+
resource_name_singular: "iCalendar Event",
|
17
|
+
supports_webhooks: true,
|
18
|
+
description: "Individual events in an icalendar, using partitioned tables rather than one big table. " \
|
19
|
+
"See icalendar_calendar_v1.",
|
20
|
+
api_docs_url: "https://icalendar.org/",
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def _denormalized_columns
|
25
|
+
d = super
|
26
|
+
d << Webhookdb::Replicator::Column.new(:calendar_external_hash, INTEGER, optional: true)
|
27
|
+
return d
|
28
|
+
end
|
29
|
+
|
30
|
+
def partition_method = Webhookdb::DBAdapter::Partitioning::HASH
|
31
|
+
def partition_column_name = :calendar_external_hash
|
32
|
+
def partition_value(resource) = self._str2inthash(resource.fetch("calendar_external_id"))
|
33
|
+
end
|
@@ -40,6 +40,7 @@ class Webhookdb::Replicator::IntercomConversationV1 < Webhookdb::Replicator::Bas
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def _mixin_backfill_url = "https://api.intercom.io/conversations"
|
43
|
+
def _mixin_backfill_hashkey = "conversations"
|
43
44
|
|
44
45
|
def _resource_and_event(request)
|
45
46
|
resource, event = super
|
@@ -22,10 +22,9 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
22
22
|
# webhook verification, which means that webhooks actually don't require any setup on the integration level. Thus,
|
23
23
|
# `supports_webhooks` is false.
|
24
24
|
def find_auth_integration
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
# rubocop:enable Naming/MemoizedInstanceVariableName
|
25
|
+
return @find_auth_integration ||= Webhookdb::Replicator.find_at_root!(
|
26
|
+
self.service_integration, service_name: "intercom_marketplace_root_v1",
|
27
|
+
)
|
29
28
|
end
|
30
29
|
|
31
30
|
def intercom_auth_headers
|
@@ -74,8 +73,9 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
74
73
|
end
|
75
74
|
|
76
75
|
def _mixin_backfill_url = raise NotImplementedError
|
76
|
+
def _mixin_backfill_hashkey = raise NotImplementedError
|
77
77
|
|
78
|
-
def _fetch_backfill_page(pagination_token,
|
78
|
+
def _fetch_backfill_page(pagination_token, last_backfilled:)
|
79
79
|
unless self.auth_credentials?
|
80
80
|
raise Webhookdb::Replicator::CredentialsMissing,
|
81
81
|
"This integration requires that the Intercom Auth integration has a valid Auth Token"
|
@@ -93,6 +93,28 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
93
93
|
timeout: Webhookdb::Intercom.http_timeout,
|
94
94
|
)
|
95
95
|
rescue Webhookdb::Http::Error => e
|
96
|
+
is_token_suspended = e.status == 401 &&
|
97
|
+
e.response["errors"].present? &&
|
98
|
+
e.response["errors"].any? { |er| er["code"] == "token_suspended" }
|
99
|
+
if is_token_suspended
|
100
|
+
root_sint = self.find_auth_integration
|
101
|
+
message = "Organization has closed their Intercom workspace and this integration should be deleted. " \
|
102
|
+
"From a console, run: " \
|
103
|
+
"Webhookdb::ServiceIntegration[#{root_sint.id}].destroy_self_and_all_dependents"
|
104
|
+
Webhookdb::DeveloperAlert.new(
|
105
|
+
subsystem: "Intercom Workspace Closed Error",
|
106
|
+
emoji: ":hook:",
|
107
|
+
fallback: message,
|
108
|
+
fields: [
|
109
|
+
{title: "Organization", value: root_sint.organization.name, short: true},
|
110
|
+
{title: "Integration ID", value: root_sint.id.to_s, short: true},
|
111
|
+
{title: "Instructions", value: message},
|
112
|
+
],
|
113
|
+
).emit
|
114
|
+
# Noop here since there's nothing to do, the developer alert takes care of notifying
|
115
|
+
# so no need to error or log.
|
116
|
+
return [], nil
|
117
|
+
end
|
96
118
|
# We are looking to catch the "api plan restricted" error. This is always a 403 and every
|
97
119
|
# 403 will be an "api plan restricted" error according to the API documentation. Because we
|
98
120
|
# specify the API version in our headers we can expect that this won't change.
|
@@ -102,8 +124,29 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
102
124
|
# a TypeError in the backfiller.
|
103
125
|
return [], nil
|
104
126
|
end
|
105
|
-
data = response.parsed_response.fetch(
|
127
|
+
data = response.parsed_response.fetch(self._mixin_backfill_hashkey)
|
106
128
|
starting_after = response.parsed_response.dig("pages", "next", "starting_after")
|
129
|
+
# Intercom pagination sorts by updated_at newest. So if we are doing an incremental sync (last_backfilled set),
|
130
|
+
# and we last backfilled after the latest updated_at, we can stop paginating.
|
131
|
+
if last_backfilled && data.last && data.last["updated_at"]
|
132
|
+
oldest_update = Time.at(data.last["updated_at"])
|
133
|
+
starting_after = nil if oldest_update < last_backfilled
|
134
|
+
end
|
107
135
|
return data, starting_after
|
108
136
|
end
|
137
|
+
|
138
|
+
def _backfillers
|
139
|
+
return [Backfiller.new(self)]
|
140
|
+
end
|
141
|
+
|
142
|
+
class Backfiller < Webhookdb::Replicator::Base::ServiceBackfiller
|
143
|
+
include Webhookdb::Backfiller::Bulk
|
144
|
+
|
145
|
+
# Upsert for each API call
|
146
|
+
def upsert_page_size = Webhookdb::Intercom.page_size
|
147
|
+
def prepare_body(_body) = nil
|
148
|
+
def upserting_replicator = self.svc
|
149
|
+
# We don't want to override newer items from webhooks, so use conditional upsert.
|
150
|
+
def conditional_upsert? = true
|
151
|
+
end
|
109
152
|
end
|