webhookdb 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
- data/db/migrations/032_remove_db_defaults.rb +2 -0
- data/db/migrations/043_text_search.rb +2 -0
- data/db/migrations/047_sync_parallelism.rb +9 -0
- data/db/migrations/048_sync_stats.rb +9 -0
- data/db/migrations/049_error_handlers.rb +18 -0
- data/db/migrations/050_logged_webhook_indices.rb +25 -0
- data/db/migrations/051_partitioning.rb +9 -0
- data/integration/async_spec.rb +0 -2
- data/integration/service_integrations_spec.rb +0 -2
- data/lib/amigo/durable_job.rb +2 -2
- data/lib/amigo/job_in_context.rb +12 -0
- data/lib/webhookdb/api/entities.rb +6 -2
- data/lib/webhookdb/api/error_handlers.rb +104 -0
- data/lib/webhookdb/api/helpers.rb +8 -1
- data/lib/webhookdb/api/icalproxy.rb +22 -0
- data/lib/webhookdb/api/install.rb +2 -1
- data/lib/webhookdb/api/saved_queries.rb +1 -0
- data/lib/webhookdb/api/saved_views.rb +1 -0
- data/lib/webhookdb/api/service_integrations.rb +1 -1
- data/lib/webhookdb/api/sync_targets.rb +1 -1
- data/lib/webhookdb/api/system.rb +5 -0
- data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
- data/lib/webhookdb/api.rb +4 -1
- data/lib/webhookdb/apps.rb +4 -0
- data/lib/webhookdb/async/autoscaler.rb +10 -0
- data/lib/webhookdb/async/job.rb +4 -0
- data/lib/webhookdb/async/scheduled_job.rb +4 -0
- data/lib/webhookdb/async.rb +2 -0
- data/lib/webhookdb/backfiller.rb +17 -4
- data/lib/webhookdb/concurrent.rb +96 -0
- data/lib/webhookdb/connection_cache.rb +29 -8
- data/lib/webhookdb/customer.rb +2 -2
- data/lib/webhookdb/database_document.rb +1 -1
- data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
- data/lib/webhookdb/db_adapter/partition.rb +14 -0
- data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
- data/lib/webhookdb/db_adapter/pg.rb +77 -5
- data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
- data/lib/webhookdb/db_adapter.rb +24 -2
- data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
- data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
- data/lib/webhookdb/http.rb +29 -15
- data/lib/webhookdb/icalendar.rb +30 -9
- data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
- data/lib/webhookdb/jobs/backfill.rb +21 -25
- data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
- data/lib/webhookdb/jobs/deprecated_jobs.rb +2 -0
- data/lib/webhookdb/jobs/emailer.rb +2 -1
- data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
- data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
- data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
- data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
- data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
- data/lib/webhookdb/jobs/logged_webhooks_replay.rb +5 -3
- data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
- data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +7 -0
- data/lib/webhookdb/jobs/monitor_metrics.rb +1 -1
- data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
- data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
- data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
- data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
- data/lib/webhookdb/jobs/process_webhook.rb +11 -12
- data/lib/webhookdb/jobs/renew_watch_channel.rb +7 -10
- data/lib/webhookdb/jobs/replication_migration.rb +5 -2
- data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
- data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
- data/lib/webhookdb/jobs/send_invite.rb +3 -2
- data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
- data/lib/webhookdb/jobs/send_webhook.rb +4 -5
- data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
- data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
- data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
- data/lib/webhookdb/jobs/webhook_subscription_delivery_event.rb +5 -8
- data/lib/webhookdb/liquid/expose.rb +1 -1
- data/lib/webhookdb/liquid/filters.rb +1 -1
- data/lib/webhookdb/liquid/partial.rb +2 -2
- data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
- data/lib/webhookdb/logged_webhook.rb +16 -2
- data/lib/webhookdb/message/email_transport.rb +1 -1
- data/lib/webhookdb/message.rb +2 -2
- data/lib/webhookdb/messages/error_generic_backfill.rb +2 -0
- data/lib/webhookdb/messages/error_icalendar_fetch.rb +2 -0
- data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
- data/lib/webhookdb/organization/alerting.rb +50 -4
- data/lib/webhookdb/organization/database_migration.rb +1 -1
- data/lib/webhookdb/organization/db_builder.rb +4 -3
- data/lib/webhookdb/organization/error_handler.rb +141 -0
- data/lib/webhookdb/organization.rb +62 -9
- data/lib/webhookdb/postgres/model_utilities.rb +2 -0
- data/lib/webhookdb/postgres.rb +1 -3
- data/lib/webhookdb/replicator/base.rb +136 -29
- data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
- data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
- data/lib/webhookdb/replicator/fake.rb +100 -88
- data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +105 -44
- data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
- data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +144 -23
- data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
- data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
- data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
- data/lib/webhookdb/replicator/intercom_v1_mixin.rb +24 -2
- data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
- data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/signalwire_message_v1.rb +1 -2
- data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
- data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
- data/lib/webhookdb/replicator.rb +4 -1
- data/lib/webhookdb/service/helpers.rb +4 -0
- data/lib/webhookdb/service/middleware.rb +6 -2
- data/lib/webhookdb/service_integration.rb +5 -0
- data/lib/webhookdb/signalwire.rb +1 -1
- data/lib/webhookdb/spec_helpers/async.rb +0 -4
- data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
- data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +87 -1
- data/lib/webhookdb/spec_helpers.rb +1 -0
- data/lib/webhookdb/sync_target.rb +195 -29
- data/lib/webhookdb/tasks/admin.rb +1 -1
- data/lib/webhookdb/tasks/annotate.rb +1 -1
- data/lib/webhookdb/tasks/db.rb +13 -1
- data/lib/webhookdb/tasks/docs.rb +1 -1
- data/lib/webhookdb/tasks/fixture.rb +1 -1
- data/lib/webhookdb/tasks/message.rb +1 -1
- data/lib/webhookdb/tasks/regress.rb +1 -1
- data/lib/webhookdb/tasks/release.rb +1 -1
- data/lib/webhookdb/tasks/sidekiq.rb +1 -1
- data/lib/webhookdb/tasks/specs.rb +1 -1
- data/lib/webhookdb/version.rb +1 -1
- data/lib/webhookdb/webhook_subscription.rb +2 -3
- data/lib/webhookdb.rb +3 -1
- metadata +88 -54
- data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
- data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
@@ -77,6 +77,7 @@ The secret to use for signing is:
|
|
77
77
|
col.new(:event_count, INTEGER, optional: true),
|
78
78
|
col.new(:feed_bytes, INTEGER, optional: true),
|
79
79
|
col.new(:last_sync_duration_ms, INTEGER, optional: true),
|
80
|
+
col.new(:last_fetch_context, OBJECT, optional: true),
|
80
81
|
]
|
81
82
|
end
|
82
83
|
|
@@ -111,7 +112,7 @@ The secret to use for signing is:
|
|
111
112
|
external_id = request.body.fetch("external_id")
|
112
113
|
case request_type
|
113
114
|
when "SYNC"
|
114
|
-
super
|
115
|
+
super
|
115
116
|
Webhookdb::Jobs::IcalendarSync.perform_async(self.service_integration.id, external_id)
|
116
117
|
return
|
117
118
|
when "DELETE"
|
@@ -121,14 +122,12 @@ The secret to use for signing is:
|
|
121
122
|
unless Webhookdb::RACK_ENV == "test"
|
122
123
|
raise "someone tried to use the special unit test google event type outside of unit tests"
|
123
124
|
end
|
124
|
-
return super
|
125
|
+
return super
|
125
126
|
else
|
126
127
|
raise ArgumentError, "Unknown request type: #{request_type}"
|
127
128
|
end
|
128
129
|
end
|
129
130
|
|
130
|
-
CLEANUP_SERVICE_NAMES = ["icalendar_event_v1"].freeze
|
131
|
-
|
132
131
|
def rows_needing_sync(dataset, now: Time.now)
|
133
132
|
cutoff = now - Webhookdb::Icalendar.sync_period_hours.hours
|
134
133
|
return dataset.where(Sequel[last_synced_at: nil] | Sequel.expr { last_synced_at < cutoff })
|
@@ -136,7 +135,7 @@ The secret to use for signing is:
|
|
136
135
|
|
137
136
|
def delete_data_for_external_id(external_id)
|
138
137
|
relevant_integrations = self.service_integration.recursive_dependents.
|
139
|
-
filter { |d|
|
138
|
+
filter { |d| Webhookdb::Icalendar::EVENT_REPLICATORS.include?(d.service_name) }
|
140
139
|
self.admin_dataset do |ds|
|
141
140
|
ds.db.transaction do
|
142
141
|
ds.where(external_id:).delete
|
@@ -157,7 +156,7 @@ The secret to use for signing is:
|
|
157
156
|
@now = now
|
158
157
|
end
|
159
158
|
|
160
|
-
def upsert_page_size =
|
159
|
+
def upsert_page_size = 2000
|
161
160
|
def conditional_upsert? = true
|
162
161
|
|
163
162
|
def prepare_body(body)
|
@@ -166,12 +165,33 @@ The secret to use for signing is:
|
|
166
165
|
end
|
167
166
|
end
|
168
167
|
|
169
|
-
def sync_row(row)
|
168
|
+
def sync_row(row, force: false, now: Time.now)
|
170
169
|
Appydays::Loggable.with_log_tags(icalendar_url: row.fetch(:ics_url)) do
|
170
|
+
last_synced_at = row.fetch(:last_synced_at)
|
171
|
+
should_sync = force ||
|
172
|
+
last_synced_at.nil? ||
|
173
|
+
# If a proxy is configured, we always want to try to sync,
|
174
|
+
# since this could have come from a webhook, but also the proxy feed refresh TTL
|
175
|
+
# is likely much lower than ICALENDAR_SYNC_PERIOD_HOURS so it's good to check on it.
|
176
|
+
# The check is very fast (should 304) so is safe to do relatively often.
|
177
|
+
Webhookdb::Icalendar.proxy_url.present? ||
|
178
|
+
last_synced_at < (now - Webhookdb::Icalendar.sync_period_hours.hours)
|
179
|
+
unless should_sync
|
180
|
+
self.logger.info("skip_sync_recently_synced", last_synced_at:)
|
181
|
+
return
|
182
|
+
end
|
171
183
|
self.with_advisory_lock(row.fetch(:pk)) do
|
172
184
|
start = Time.now
|
173
|
-
|
174
|
-
|
185
|
+
if (dep = self.find_dependent(Webhookdb::Icalendar::EVENT_REPLICATORS))
|
186
|
+
if dep.replicator.avoid_writes?
|
187
|
+
# Check if this table is being vacuumed/etc. We use this instead of a semaphore job,
|
188
|
+
# since it's a better fit for icalendar, which is pre-scheduled, rather than reactive.
|
189
|
+
# That is, when we receive webhooks, a semaphore job gives us a more predictable rate;
|
190
|
+
# but icalendar rate is negotiated in advance (when enqueing jobs),
|
191
|
+
# and we can be more 'helpful' to something like a vacuum by not running any jobs at all.
|
192
|
+
self.logger.info("skip_sync_table_locked")
|
193
|
+
raise Amigo::Retry::Retry, 60.seconds + (rand * 10.seconds)
|
194
|
+
end
|
175
195
|
processor = self._sync_row(row, dep, now:)
|
176
196
|
end
|
177
197
|
self.admin_dataset do |ds|
|
@@ -181,6 +201,12 @@ The secret to use for signing is:
|
|
181
201
|
event_count: processor&.upserted_identities&.count,
|
182
202
|
feed_bytes: processor&.read_bytes,
|
183
203
|
last_sync_duration_ms: (Time.now - start).in_milliseconds,
|
204
|
+
last_fetch_context: {
|
205
|
+
"hash" => processor&.feed_hash,
|
206
|
+
"content_type" => processor&.headers&.fetch("Content-Type", nil),
|
207
|
+
"content_length" => processor&.headers&.fetch("Content-Length", nil),
|
208
|
+
"etag" => processor&.headers&.fetch("Etag", nil),
|
209
|
+
}.to_json,
|
184
210
|
)
|
185
211
|
end
|
186
212
|
end
|
@@ -191,14 +217,19 @@ The secret to use for signing is:
|
|
191
217
|
calendar_external_id = row.fetch(:external_id)
|
192
218
|
begin
|
193
219
|
request_url = self._clean_ics_url(row.fetch(:ics_url))
|
194
|
-
io =
|
195
|
-
rescue Down::Error,
|
220
|
+
io = self._make_ics_request(request_url, row.fetch(:last_fetch_context))
|
221
|
+
rescue Down::Error,
|
222
|
+
URI::InvalidURIError,
|
223
|
+
HTTPX::NativeResolveError,
|
224
|
+
HTTPX::InsecureRedirectError,
|
225
|
+
HTTPX::Connection::HTTP2::Error,
|
226
|
+
EOFError => e
|
196
227
|
self._handle_down_error(e, request_url:, calendar_external_id:)
|
197
228
|
return
|
198
229
|
end
|
199
230
|
|
200
231
|
upserter = Upserter.new(dep.replicator, calendar_external_id, now:)
|
201
|
-
processor = EventProcessor.new(io
|
232
|
+
processor = EventProcessor.new(io:, upserter:, headers: io.data[:headers])
|
202
233
|
processor.process
|
203
234
|
# Delete all the extra replicator rows, and cancel all the rows that weren't upserted.
|
204
235
|
dep.replicator.admin_dataset do |ds|
|
@@ -219,6 +250,24 @@ The secret to use for signing is:
|
|
219
250
|
return processor
|
220
251
|
end
|
221
252
|
|
253
|
+
def _make_ics_request(request_url, last_fetch_context)
|
254
|
+
# Some servers require a VERY explicit accept header,
|
255
|
+
# so tell them we prefer icalendar here.
|
256
|
+
# Using Httpx, Accept-Encoding is gzip,deflate
|
257
|
+
# which seems fine (server should use identity as worst case).
|
258
|
+
headers = {
|
259
|
+
"Accept" => "text/calendar,*/*",
|
260
|
+
}
|
261
|
+
headers["If-None-Match"] = last_fetch_context["etag"] if last_fetch_context & ["etag"]
|
262
|
+
if (proxy_url = Webhookdb::Icalendar.proxy_url).present?
|
263
|
+
request_url = "#{proxy_url.delete_suffix('/')}/?url=#{URI.encode_www_form_component(request_url)}"
|
264
|
+
headers["Authorization"] = "Apikey #{Webhookdb::Icalendar.proxy_api_key}" if
|
265
|
+
Webhookdb::Icalendar.proxy_api_key.present?
|
266
|
+
end
|
267
|
+
resp = Webhookdb::Http.chunked_download(request_url, rewindable: false, headers:)
|
268
|
+
return resp
|
269
|
+
end
|
270
|
+
|
222
271
|
# We get all sorts of strange urls, fix up what we can.
|
223
272
|
def _clean_ics_url(url)
|
224
273
|
u = URI(url)
|
@@ -251,12 +300,17 @@ The secret to use for signing is:
|
|
251
300
|
else
|
252
301
|
self._handle_retryable_down_error!(e, request_url:, calendar_external_id:)
|
253
302
|
end
|
254
|
-
when Down::TimeoutError, Down::ConnectionError, Down::InvalidUrl,
|
303
|
+
when Down::TimeoutError, Down::ConnectionError, Down::InvalidUrl,
|
304
|
+
Errno::ECONNRESET,
|
305
|
+
URI::InvalidURIError,
|
306
|
+
HTTPX::NativeResolveError, HTTPX::InsecureRedirectError,
|
307
|
+
HTTPX::Connection::HTTP2::Error,
|
308
|
+
EOFError
|
255
309
|
response_status = 0
|
256
310
|
response_body = e.to_s
|
257
311
|
when Down::ClientError
|
258
312
|
raise e if e.response.nil?
|
259
|
-
response_status = e.response.
|
313
|
+
response_status = e.response.status.to_i
|
260
314
|
self._handle_retryable_down_error!(e, request_url:, calendar_external_id:) if
|
261
315
|
self._retryable_client_error?(e, request_url:)
|
262
316
|
# These are all the errors we've seen, we can't do anything about.
|
@@ -268,18 +322,24 @@ The secret to use for signing is:
|
|
268
322
|
404, 405, # Fundamental issues with the URL given
|
269
323
|
409, 410, # More access problems
|
270
324
|
417, # If someone uses an Outlook HTML calendar, fetch gives us a 417
|
325
|
+
422, # Sometimes used instead of 404
|
271
326
|
429, # Usually 429s are retried (as above), but in some cases they're not.
|
327
|
+
500, 503, 504, # Intermittent server issues, usually
|
328
|
+
599, # Represents a timeout in icalproxy
|
272
329
|
]
|
273
330
|
# For most client errors, we can't do anything about it. For example,
|
274
331
|
# and 'unshared' URL could result in a 401, 403, 404, or even a 405.
|
275
332
|
# For now, other client errors, we can raise on,
|
276
333
|
# in case it's something we can fix/work around.
|
277
334
|
# For example, it's possible something like a 415 is a WebhookDB issue.
|
335
|
+
if response_status == 421 && (origin_err = e.response.headers["Ical-Proxy-Origin-Error"])
|
336
|
+
response_status = origin_err.to_i
|
337
|
+
end
|
278
338
|
raise e unless expected_errors.include?(response_status)
|
279
|
-
response_body = e
|
339
|
+
response_body = self._safe_read_body(e)
|
280
340
|
when Down::ServerError
|
281
|
-
response_status = e.response.
|
282
|
-
response_body = e
|
341
|
+
response_status = e.response.status.to_i
|
342
|
+
response_body = self._safe_read_body(e)
|
283
343
|
else
|
284
344
|
response_body = nil
|
285
345
|
response_status = nil
|
@@ -299,8 +359,16 @@ The secret to use for signing is:
|
|
299
359
|
self.service_integration.organization.alerting.dispatch_alert(message, separate_connection: false)
|
300
360
|
end
|
301
361
|
|
362
|
+
# We can hit an error while reading the error body, since it was opened as a stream.
|
363
|
+
# Ignore those errors.
|
364
|
+
def _safe_read_body(e)
|
365
|
+
return e.response.body.to_s
|
366
|
+
rescue OpenSSL::SSL::SSLError, HTTPX::Error
|
367
|
+
return "<error reading body>"
|
368
|
+
end
|
369
|
+
|
302
370
|
def _retryable_client_error?(e, request_url:)
|
303
|
-
code = e.response.
|
371
|
+
code = e.response.status.to_i
|
304
372
|
# This is a bad domain that returns 429 for most requests.
|
305
373
|
# Tell the org admins it won't sync.
|
306
374
|
return false if code == 429 && request_url.start_with?("https://ical.schedulestar.com")
|
@@ -317,7 +385,7 @@ The secret to use for signing is:
|
|
317
385
|
retry_in = rand(4..60).minutes
|
318
386
|
self.logger.debug(
|
319
387
|
"icalendar_fetch_error_retry",
|
320
|
-
response_status: e.respond_to?(:response) ? e.response&.
|
388
|
+
response_status: e.respond_to?(:response) ? e.response&.status : 0,
|
321
389
|
request_url:,
|
322
390
|
calendar_external_id:,
|
323
391
|
retry_at: Time.now + retry_in,
|
@@ -326,11 +394,12 @@ The secret to use for signing is:
|
|
326
394
|
end
|
327
395
|
|
328
396
|
class EventProcessor
|
329
|
-
attr_reader :upserted_identities, :read_bytes
|
397
|
+
attr_reader :upserted_identities, :read_bytes, :headers
|
330
398
|
|
331
|
-
def initialize(io
|
399
|
+
def initialize(io:, upserter:, headers:)
|
332
400
|
@io = io
|
333
401
|
@upserter = upserter
|
402
|
+
@headers = headers
|
334
403
|
# Keep track of everything we upsert. For any rows we aren't upserting,
|
335
404
|
# delete them if they're recurring, or cancel them if they're not recurring.
|
336
405
|
# If doing it this way is slow, we could invert this (pull down all IDs and pop from the set).
|
@@ -346,8 +415,11 @@ The secret to use for signing is:
|
|
346
415
|
# Keep track of the bytes we've read from the file.
|
347
416
|
# Never trust Content-Length headers for ical feeds.
|
348
417
|
@read_bytes = 0
|
418
|
+
@feed_md5 = Digest::MD5.new
|
349
419
|
end
|
350
420
|
|
421
|
+
def feed_hash = @feed_md5.hexdigest
|
422
|
+
|
351
423
|
def delete_condition
|
352
424
|
return nil if @max_sequence_num_by_uid.empty?
|
353
425
|
return @max_sequence_num_by_uid.map do |uid, n|
|
@@ -486,7 +558,14 @@ The secret to use for signing is:
|
|
486
558
|
# The new UID has the sequence number.
|
487
559
|
e["UID"] = {"v" => "#{uid}-#{idx}"}
|
488
560
|
e["DTSTART"] = self._ical_entry_from_ruby(occ.start_time, start_entry, is_date)
|
489
|
-
|
561
|
+
if has_end_time
|
562
|
+
if !is_date && end_entry["VALUE"] == "DATE"
|
563
|
+
# It's possible that DTSTART is a time, but DTEND is a date. This makes no sense,
|
564
|
+
# so skip setting an end date. It will be in the :data column at least.
|
565
|
+
else
|
566
|
+
e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date)
|
567
|
+
end
|
568
|
+
end
|
490
569
|
yield e
|
491
570
|
final_sequence = idx
|
492
571
|
break if occ.start_time > dont_project_after
|
@@ -508,7 +587,11 @@ The secret to use for signing is:
|
|
508
587
|
return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => tzid} if tzid
|
509
588
|
value = entry.fetch("v")
|
510
589
|
return {"v" => value} if value.end_with?("Z")
|
511
|
-
|
590
|
+
if /^\d{8}T\d{6}$/.match?(value)
|
591
|
+
@upserter.upserting_replicator.logger.warn "ical_assuming_utc_time", ical_entry: entry, ruby_time: r
|
592
|
+
return {"v" => "#{value}Z"}
|
593
|
+
end
|
594
|
+
raise "Cannot create ical entry from: '#{r}', #{entry}"
|
512
595
|
end
|
513
596
|
|
514
597
|
def _icecube_rule_from_ical(ical)
|
@@ -551,6 +634,7 @@ The secret to use for signing is:
|
|
551
634
|
in_vevent = false
|
552
635
|
while (line = @io.gets)
|
553
636
|
@read_bytes += line.size
|
637
|
+
@feed_md5.update(line)
|
554
638
|
begin
|
555
639
|
line.rstrip!
|
556
640
|
rescue Encoding::CompatibilityError
|
@@ -589,4 +673,41 @@ The secret to use for signing is:
|
|
589
673
|
@upserter.upserting_replicator.logger.warn("invalid_vevent_hash", vevent_uids: bad_event_uids.sort)
|
590
674
|
end
|
591
675
|
end
|
676
|
+
|
677
|
+
# Return true if the data in the feed has changed from what was last synced,
|
678
|
+
# or false if it has not so the sync can be skipped.
|
679
|
+
# This operation is meant to be resource-light (most of the work is the HTTP request),
|
680
|
+
# so should be done in a threadpool.
|
681
|
+
#
|
682
|
+
# - If we have no previous fetch context, we sync.
|
683
|
+
# - If the fetch errors, sync, because we want the normal error handler to figure it out
|
684
|
+
# (alert admins, etc).
|
685
|
+
# - If the last fetch's content type and length is different from the current, we sync.
|
686
|
+
# - Download the bytes. If the hash of the bytes is different from what was last processed,
|
687
|
+
# sync. Since this involves reading the streaming body, we must return a copy of the body (a StringIO).
|
688
|
+
def feed_changed?(row)
|
689
|
+
last_fetch = row.fetch(:last_fetch_context)
|
690
|
+
return true if last_fetch.nil? || last_fetch.empty?
|
691
|
+
|
692
|
+
begin
|
693
|
+
url = self._clean_ics_url(row.fetch(:ics_url))
|
694
|
+
resp = self._make_ics_request(url, last_fetch)
|
695
|
+
rescue Down::NotModified
|
696
|
+
return false
|
697
|
+
rescue StandardError
|
698
|
+
return true
|
699
|
+
end
|
700
|
+
headers = resp.data[:headers] || {}
|
701
|
+
content_type_match = headers["Content-Type"] == last_fetch["content_type"] &&
|
702
|
+
headers["Content-Length"] == last_fetch["content_length"]
|
703
|
+
return true unless content_type_match
|
704
|
+
last_hash = last_fetch["hash"]
|
705
|
+
return true if last_hash.nil?
|
706
|
+
|
707
|
+
hash = Digest::MD5.new
|
708
|
+
while (line = resp.gets)
|
709
|
+
hash.update(line)
|
710
|
+
end
|
711
|
+
return hash.hexdigest != last_hash
|
712
|
+
end
|
592
713
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "webhookdb/icalendar"
|
4
4
|
require "webhookdb/windows_tz"
|
5
|
+
require "webhookdb/replicator/base_stale_row_deleter"
|
5
6
|
|
6
7
|
class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
7
8
|
include Appydays::Loggable
|
@@ -115,7 +116,6 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
115
116
|
:compound_identity,
|
116
117
|
TEXT,
|
117
118
|
data_key: "<compound key, see converter>",
|
118
|
-
index: true,
|
119
119
|
converter: CONV_REMOTE_KEY,
|
120
120
|
optional: true, # This is done via the converter, data_key never exists
|
121
121
|
)
|
@@ -166,6 +166,7 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
166
166
|
data.delete("calendar_external_id")
|
167
167
|
data.delete("recurring_event_id")
|
168
168
|
data.delete("recurring_event_sequence")
|
169
|
+
data.delete("row_updated_at")
|
169
170
|
return data
|
170
171
|
end
|
171
172
|
|
@@ -215,11 +216,21 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
215
216
|
columns: [:calendar_external_id, :start_date, :end_date],
|
216
217
|
where: Sequel[:status].is_distinct_from("CANCELLED") & (Sequel[:start_date] !~ nil),
|
217
218
|
),
|
219
|
+
Webhookdb::Replicator::IndexSpec.new(
|
220
|
+
columns: [:row_updated_at],
|
221
|
+
where: Sequel[status: "CANCELLED"],
|
222
|
+
identifier: "cancelled_row_updated_at",
|
223
|
+
),
|
218
224
|
]
|
219
225
|
end
|
220
226
|
|
221
227
|
def _update_where_expr
|
222
|
-
|
228
|
+
# Compare against data to avoid the constant writes. JSONB != operations are very fast,
|
229
|
+
# so this should not be any real performance issue.
|
230
|
+
# last_modified_at is unreliable because LAST-MODIFIED is unreliable,
|
231
|
+
# even in feeds it is set. There are cases, such as adding an EXDATE to an RRULE,
|
232
|
+
# that do not trigger LAST-MODIFIED changes.
|
233
|
+
return self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
|
223
234
|
end
|
224
235
|
|
225
236
|
# @param [Array<String>] lines
|
@@ -369,50 +380,15 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
|
|
369
380
|
# +stale_at+ to +age_cutoff+. This avoids endlessly adding to the icalendar events table
|
370
381
|
# due to feeds that change UIDs each fetch- events with changed UIDs will become CANCELLED,
|
371
382
|
# and then deleted over time.
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
# Since this routine should run regularly, we should rarely have events more than 35 or 36 days old,
|
378
|
-
# for example.
|
379
|
-
# Use +nil+ to use no limit (a full table scan) which may be necessary when running this feature
|
380
|
-
# for the first time.
|
381
|
-
# @param chunk_size [Integer] The row delete is done in chunks to avoid long locks.
|
382
|
-
# The default seems safe, but it's exposed as a parameter if you need to play around with it,
|
383
|
-
# and can be done via configuration if needed at some point.
|
384
|
-
def delete_stale_cancelled_events(
|
385
|
-
stale_at: Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days.ago,
|
386
|
-
age_cutoff: (Webhookdb::Icalendar.stale_cancelled_event_threshold_days + 10).days.ago,
|
387
|
-
chunk_size: 10_000
|
388
|
-
)
|
389
|
-
# Delete in chunks, like:
|
390
|
-
# DELETE from "public"."icalendar_event_v1_aaaa"
|
391
|
-
# WHERE pk IN (
|
392
|
-
# SELECT pk FROM "public"."icalendar_event_v1_aaaa"
|
393
|
-
# WHERE row_updated_at < (now() - '35 days'::interval)
|
394
|
-
# LIMIT 10000
|
395
|
-
# )
|
396
|
-
age = age_cutoff..stale_at
|
397
|
-
self.admin_dataset do |ds|
|
398
|
-
chunk_ds = ds.where(row_updated_at: age, status: "CANCELLED").select(:pk).limit(chunk_size)
|
399
|
-
loop do
|
400
|
-
# Due to conflicts where a feed is being inserted while the delete is happening,
|
401
|
-
# this may raise an error like:
|
402
|
-
# deadlock detected
|
403
|
-
# DETAIL: Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
|
404
|
-
# Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
|
405
|
-
# HINT: See server log for query details.
|
406
|
-
# CONTEXT: while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
|
407
|
-
# Unit testing this is very difficult though, and in practice it is rare,
|
408
|
-
# and normal Sidekiq job retries should be sufficient to handle this.
|
409
|
-
# So we don't explicitly handle deadlocks, but could if it becomes an issue.
|
410
|
-
deleted = ds.where(pk: chunk_ds).delete
|
411
|
-
break if deleted != chunk_size
|
412
|
-
end
|
413
|
-
end
|
383
|
+
class StaleRowDeleter < Webhookdb::Replicator::BaseStaleRowDeleter
|
384
|
+
def stale_at = Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days
|
385
|
+
def lookback_window = Webhookdb::Icalendar.stale_cancelled_event_lookback_days.days
|
386
|
+
def updated_at_column = :row_updated_at
|
387
|
+
def stale_condition = {status: "CANCELLED"}
|
414
388
|
end
|
415
389
|
|
390
|
+
def stale_row_deleter = StaleRowDeleter.new(self)
|
391
|
+
|
416
392
|
def calculate_webhook_state_machine
|
417
393
|
if (step = self.calculate_dependency_state_machine_step(dependency_help: ""))
|
418
394
|
return step
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "webhookdb/replicator/icalendar_event_v1"
|
4
|
+
require "webhookdb/replicator/partitionable_mixin"
|
5
|
+
|
6
|
+
class Webhookdb::Replicator::IcalendarEventV1Partitioned < Webhookdb::Replicator::IcalendarEventV1
|
7
|
+
include Webhookdb::Replicator::PartitionableMixin
|
8
|
+
|
9
|
+
# @return [Webhookdb::Replicator::Descriptor]
|
10
|
+
def self.descriptor
|
11
|
+
return Webhookdb::Replicator::Descriptor.new(
|
12
|
+
name: "icalendar_event_v1_partitioned",
|
13
|
+
ctor: ->(sint) { self.new(sint) },
|
14
|
+
dependency_descriptor: Webhookdb::Replicator::IcalendarCalendarV1.descriptor,
|
15
|
+
feature_roles: ["partitioning_beta"],
|
16
|
+
resource_name_singular: "iCalendar Event",
|
17
|
+
supports_webhooks: true,
|
18
|
+
description: "Individual events in an icalendar, using partitioned tables rather than one big table. " \
|
19
|
+
"See icalendar_calendar_v1.",
|
20
|
+
api_docs_url: "https://icalendar.org/",
|
21
|
+
)
|
22
|
+
end
|
23
|
+
|
24
|
+
def _denormalized_columns
|
25
|
+
d = super
|
26
|
+
d << Webhookdb::Replicator::Column.new(:calendar_external_hash, INTEGER, optional: true)
|
27
|
+
return d
|
28
|
+
end
|
29
|
+
|
30
|
+
def partition_method = Webhookdb::DBAdapter::Partitioning::HASH
|
31
|
+
def partition_column_name = :calendar_external_hash
|
32
|
+
def partition_value(resource) = self._str2inthash(resource.fetch("calendar_external_id"))
|
33
|
+
end
|
@@ -40,6 +40,7 @@ class Webhookdb::Replicator::IntercomConversationV1 < Webhookdb::Replicator::Bas
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def _mixin_backfill_url = "https://api.intercom.io/conversations"
|
43
|
+
def _mixin_backfill_hashkey = "conversations"
|
43
44
|
|
44
45
|
def _resource_and_event(request)
|
45
46
|
resource, event = super
|
@@ -73,8 +73,9 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
73
73
|
end
|
74
74
|
|
75
75
|
def _mixin_backfill_url = raise NotImplementedError
|
76
|
+
def _mixin_backfill_hashkey = raise NotImplementedError
|
76
77
|
|
77
|
-
def _fetch_backfill_page(pagination_token,
|
78
|
+
def _fetch_backfill_page(pagination_token, last_backfilled:)
|
78
79
|
unless self.auth_credentials?
|
79
80
|
raise Webhookdb::Replicator::CredentialsMissing,
|
80
81
|
"This integration requires that the Intercom Auth integration has a valid Auth Token"
|
@@ -123,8 +124,29 @@ module Webhookdb::Replicator::IntercomV1Mixin
|
|
123
124
|
# a TypeError in the backfiller.
|
124
125
|
return [], nil
|
125
126
|
end
|
126
|
-
data = response.parsed_response.fetch(
|
127
|
+
data = response.parsed_response.fetch(self._mixin_backfill_hashkey)
|
127
128
|
starting_after = response.parsed_response.dig("pages", "next", "starting_after")
|
129
|
+
# Intercom pagination sorts by updated_at newest. So if we are doing an incremental sync (last_backfilled set),
|
130
|
+
# and we last backfilled after the latest updated_at, we can stop paginating.
|
131
|
+
if last_backfilled && data.last && data.last["updated_at"]
|
132
|
+
oldest_update = Time.at(data.last["updated_at"])
|
133
|
+
starting_after = nil if oldest_update < last_backfilled
|
134
|
+
end
|
128
135
|
return data, starting_after
|
129
136
|
end
|
137
|
+
|
138
|
+
def _backfillers
|
139
|
+
return [Backfiller.new(self)]
|
140
|
+
end
|
141
|
+
|
142
|
+
class Backfiller < Webhookdb::Replicator::Base::ServiceBackfiller
|
143
|
+
include Webhookdb::Backfiller::Bulk
|
144
|
+
|
145
|
+
# Upsert for each API call
|
146
|
+
def upsert_page_size = Webhookdb::Intercom.page_size
|
147
|
+
def prepare_body(_body) = nil
|
148
|
+
def upserting_replicator = self.svc
|
149
|
+
# We don't want to override newer items from webhooks, so use conditional upsert.
|
150
|
+
def conditional_upsert? = true
|
151
|
+
end
|
130
152
|
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Mixin for replicators that support partitioning.
|
4
|
+
# Partitioning is currently in beta,
|
5
|
+
# with the following limitations/context:
|
6
|
+
#
|
7
|
+
# - They cannot be created from the CLI.
|
8
|
+
# Because the partitions must be created during the CREATE TABLE call,
|
9
|
+
# the partition_value must be set immediately on creation,
|
10
|
+
# or CREATE TABLE must be deferred.
|
11
|
+
# - CLI support would also require making sure this field isn't edited.
|
12
|
+
# This is an annoying change, so we're putting it off for now.
|
13
|
+
# - Instead, partitioned replicators must be created in the console.
|
14
|
+
# - The number of HASH partitions cannot be changed;
|
15
|
+
# there is no good way to handle this in Postgres so we don't bother here.
|
16
|
+
# - RANGE partitions are not supported.
|
17
|
+
# We need to support creating the partition when the INSERT fails.
|
18
|
+
# But creating the partitioned table definition itself does work/has a shared behavior at least.
|
19
|
+
# - Existing replicators cannot be converted to partitioned.
|
20
|
+
# This is theoretically possible, but it seems easier to just start over
|
21
|
+
# with a new replicator.
|
22
|
+
# - Instead:
|
23
|
+
# - If this is a 'child' replicator, then create a new parent and this child,
|
24
|
+
# then copy over the parent data, either directly (for icalendar)
|
25
|
+
# or using HTTP requests (like with Plaid or Google) where more logic is required.
|
26
|
+
# - Otherwise, it'll depend on the replicator.
|
27
|
+
# - Then to switch clients using the old replicator, to the new replicator, you can:
|
28
|
+
# - Then turn off all workers.
|
29
|
+
# - Rename the new table to the old, and old table to the new.
|
30
|
+
# - Update the service integrations, so the old one points to the new table name and opaque id,
|
31
|
+
# and the new one points to the old table name and opaque id.
|
32
|
+
#
|
33
|
+
module Webhookdb::Replicator::PartitionableMixin
|
34
|
+
# The partition method, like Webhookdb::DBAdapter::Partitioning::HASH
|
35
|
+
def partition_method = raise NotImplementedError
|
36
|
+
# The partition column name.
|
37
|
+
# Must be present in +_denormalized_columns+.
|
38
|
+
# @return [Symbol]
|
39
|
+
def partition_column_name = raise NotImplementedError
|
40
|
+
# The value for the denormalized column. For HASH partitioning this would be an integer,
|
41
|
+
# for RANGE partitioning this could be a timestamp, etc.
|
42
|
+
# Takes the resource and returns the value.
|
43
|
+
def partition_value(_resource) = raise NotImplementedError
|
44
|
+
|
45
|
+
def partition? = true
|
46
|
+
|
47
|
+
def partitioning
|
48
|
+
return Webhookdb::DBAdapter::Partitioning.new(by: self.partition_method, column: self.partition_column_name)
|
49
|
+
end
|
50
|
+
|
51
|
+
def _prepare_for_insert(resource, event, request, enrichment)
|
52
|
+
h = super
|
53
|
+
h[self.partition_column_name] = self.partition_value(resource)
|
54
|
+
return h
|
55
|
+
end
|
56
|
+
|
57
|
+
def _upsert_conflict_target
|
58
|
+
return [self.partition_column_name, self._remote_key_column.name]
|
59
|
+
end
|
60
|
+
|
61
|
+
# Convert the given string into a stable MD5-derived hash
|
62
|
+
# that can be stored in a (signed, 4 bit) INTEGER column.
|
63
|
+
def _str2inthash(s)
|
64
|
+
# MD5 is 128 bits/16 bytes/32 hex chars (2 chars per byte).
|
65
|
+
# Integers are 32 bits/4 bytes/8 hex chars.
|
66
|
+
# Grab the first 8 chars and convert it to an integer.
|
67
|
+
unsigned_md5int = Digest::MD5.hexdigest(s)[..8].to_i(16)
|
68
|
+
# Then AND it with a 32 bit bitmask to make sure it fits in 32 bits
|
69
|
+
# (though I'm not entirely sure why the above doesn't result in 32 bits always).
|
70
|
+
unsigned_int32 = unsigned_md5int & 0xFFFFFFFF
|
71
|
+
# Convert it from unsigned (0 to 4.2B) to signed (-2.1B to 2.1B) by subtracting 2.1B
|
72
|
+
# (the max 2 byte integer), as opposed to a 4 byte integer which we're dealing with here.
|
73
|
+
signed_md5int = unsigned_int32 - MAX_16BIT_INT
|
74
|
+
return signed_md5int
|
75
|
+
end
|
76
|
+
|
77
|
+
MAX_16BIT_INT = 2**31
|
78
|
+
|
79
|
+
# Return the partitions belonging to the table.
|
80
|
+
# @param db The organization connection.
|
81
|
+
# @return [Array<Webhookdb::DBAdapter::Partition>]
|
82
|
+
def existing_partitions(db)
|
83
|
+
# SELECT inhrelid::regclass AS child
|
84
|
+
# FROM pg_catalog.pg_inherits
|
85
|
+
# WHERE inhparent = 'my_schema.foo'::regclass;
|
86
|
+
parent = self.schema_and_table_symbols.map(&:to_s).join(".")
|
87
|
+
partnames = db[Sequel[:pg_catalog][:pg_inherits]].
|
88
|
+
where(inhparent: Sequel[parent].cast(:regclass)).
|
89
|
+
select_map(Sequel[:inhrelid].cast(:regclass))
|
90
|
+
parent_table = self.dbadapter_table
|
91
|
+
result = partnames.map do |part|
|
92
|
+
suffix = self.partition_suffix(part)
|
93
|
+
Webhookdb::DBAdapter::Partition.new(parent_table:, partition_name: part.to_sym, suffix:)
|
94
|
+
end
|
95
|
+
return result
|
96
|
+
end
|
97
|
+
|
98
|
+
def partition_suffix(partname)
|
99
|
+
return partname[/_[a-zA-Z\d]+$/].to_sym
|
100
|
+
end
|
101
|
+
|
102
|
+
def partition_align_name
|
103
|
+
tblname = self.service_integration.table_name
|
104
|
+
self.service_integration.organization.admin_connection do |db|
|
105
|
+
partitions = self.existing_partitions(db)
|
106
|
+
db.transaction do
|
107
|
+
partitions.each do |partition|
|
108
|
+
next if partition.partition_name.to_s.start_with?(tblname)
|
109
|
+
schema = partition.parent_table.schema.name
|
110
|
+
new_partname = "#{tblname}#{partition.suffix}"
|
111
|
+
db << "ALTER TABLE #{schema}.#{partition.partition_name} RENAME TO #{new_partname}"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -52,7 +52,7 @@ class Webhookdb::Replicator::SignalwireMessageV1 < Webhookdb::Replicator::Base
|
|
52
52
|
h = u.host.gsub(/\.signalwire\.com$/, "")
|
53
53
|
value = h
|
54
54
|
end
|
55
|
-
return super
|
55
|
+
return super
|
56
56
|
end
|
57
57
|
|
58
58
|
def calculate_backfill_state_machine
|
@@ -200,7 +200,6 @@ Press 'Show' next to the newly-created API token, and copy it.)
|
|
200
200
|
request_url = e.uri.to_s
|
201
201
|
request_method = e.http_method
|
202
202
|
end
|
203
|
-
self.logger.warn("signalwire_backfill_error", response_body:, response_status:, request_url:)
|
204
203
|
message = Webhookdb::Messages::ErrorGenericBackfill.new(
|
205
204
|
self.service_integration,
|
206
205
|
response_status:,
|