webhookdb 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/admin-dist/assets/{index-6aebf805.js → index-9306dd28.js} +39 -39
  3. data/admin-dist/index.html +1 -1
  4. data/data/messages/templates/errors/generic_backfill.email.liquid +30 -0
  5. data/data/messages/templates/errors/icalendar_fetch.email.liquid +8 -2
  6. data/data/messages/templates/specs/with_fields.email.liquid +6 -0
  7. data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
  8. data/db/migrations/032_remove_db_defaults.rb +2 -0
  9. data/db/migrations/043_text_search.rb +2 -0
  10. data/db/migrations/045_system_log.rb +15 -0
  11. data/db/migrations/046_indices.rb +14 -0
  12. data/db/migrations/047_sync_parallelism.rb +9 -0
  13. data/db/migrations/048_sync_stats.rb +9 -0
  14. data/db/migrations/049_error_handlers.rb +18 -0
  15. data/db/migrations/050_logged_webhook_indices.rb +25 -0
  16. data/db/migrations/051_partitioning.rb +9 -0
  17. data/integration/async_spec.rb +0 -2
  18. data/integration/service_integrations_spec.rb +0 -2
  19. data/lib/amigo/durable_job.rb +2 -2
  20. data/lib/amigo/job_in_context.rb +12 -0
  21. data/lib/webhookdb/admin.rb +6 -0
  22. data/lib/webhookdb/admin_api/data_provider.rb +1 -0
  23. data/lib/webhookdb/admin_api/entities.rb +8 -0
  24. data/lib/webhookdb/aggregate_result.rb +1 -1
  25. data/lib/webhookdb/api/entities.rb +6 -2
  26. data/lib/webhookdb/api/error_handlers.rb +104 -0
  27. data/lib/webhookdb/api/helpers.rb +25 -1
  28. data/lib/webhookdb/api/icalproxy.rb +22 -0
  29. data/lib/webhookdb/api/install.rb +2 -1
  30. data/lib/webhookdb/api/organizations.rb +6 -0
  31. data/lib/webhookdb/api/saved_queries.rb +1 -0
  32. data/lib/webhookdb/api/saved_views.rb +1 -0
  33. data/lib/webhookdb/api/service_integrations.rb +2 -1
  34. data/lib/webhookdb/api/sync_targets.rb +1 -1
  35. data/lib/webhookdb/api/system.rb +5 -0
  36. data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
  37. data/lib/webhookdb/api.rb +4 -1
  38. data/lib/webhookdb/apps.rb +4 -0
  39. data/lib/webhookdb/async/autoscaler.rb +10 -0
  40. data/lib/webhookdb/async/job.rb +4 -0
  41. data/lib/webhookdb/async/scheduled_job.rb +4 -0
  42. data/lib/webhookdb/async.rb +2 -0
  43. data/lib/webhookdb/backfiller.rb +17 -4
  44. data/lib/webhookdb/concurrent.rb +96 -0
  45. data/lib/webhookdb/connection_cache.rb +57 -10
  46. data/lib/webhookdb/console.rb +1 -1
  47. data/lib/webhookdb/customer/reset_code.rb +1 -1
  48. data/lib/webhookdb/customer.rb +5 -4
  49. data/lib/webhookdb/database_document.rb +1 -1
  50. data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
  51. data/lib/webhookdb/db_adapter/partition.rb +14 -0
  52. data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
  53. data/lib/webhookdb/db_adapter/pg.rb +77 -5
  54. data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
  55. data/lib/webhookdb/db_adapter.rb +25 -3
  56. data/lib/webhookdb/dbutil.rb +2 -0
  57. data/lib/webhookdb/errors.rb +34 -0
  58. data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
  59. data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
  60. data/lib/webhookdb/http.rb +30 -16
  61. data/lib/webhookdb/icalendar.rb +30 -9
  62. data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
  63. data/lib/webhookdb/jobs/backfill.rb +21 -25
  64. data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
  65. data/lib/webhookdb/jobs/deprecated_jobs.rb +3 -0
  66. data/lib/webhookdb/jobs/emailer.rb +2 -1
  67. data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
  68. data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
  69. data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
  70. data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
  71. data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
  72. data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
  73. data/lib/webhookdb/jobs/{logged_webhook_replay.rb → logged_webhooks_replay.rb} +5 -3
  74. data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
  75. data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +112 -0
  76. data/lib/webhookdb/jobs/monitor_metrics.rb +29 -0
  77. data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
  78. data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
  79. data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
  80. data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
  81. data/lib/webhookdb/jobs/process_webhook.rb +11 -12
  82. data/lib/webhookdb/jobs/renew_watch_channel.rb +10 -10
  83. data/lib/webhookdb/jobs/replication_migration.rb +5 -2
  84. data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
  85. data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
  86. data/lib/webhookdb/jobs/send_invite.rb +3 -2
  87. data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
  88. data/lib/webhookdb/jobs/send_webhook.rb +4 -5
  89. data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
  90. data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
  91. data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
  92. data/lib/webhookdb/jobs/{webhook_subscription_delivery_attempt.rb → webhook_subscription_delivery_event.rb} +5 -8
  93. data/lib/webhookdb/liquid/expose.rb +1 -1
  94. data/lib/webhookdb/liquid/filters.rb +1 -1
  95. data/lib/webhookdb/liquid/partial.rb +2 -2
  96. data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
  97. data/lib/webhookdb/logged_webhook.rb +16 -2
  98. data/lib/webhookdb/message/email_transport.rb +1 -1
  99. data/lib/webhookdb/message/transport.rb +1 -1
  100. data/lib/webhookdb/message.rb +55 -4
  101. data/lib/webhookdb/messages/error_generic_backfill.rb +47 -0
  102. data/lib/webhookdb/messages/error_icalendar_fetch.rb +5 -0
  103. data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
  104. data/lib/webhookdb/messages/specs.rb +16 -0
  105. data/lib/webhookdb/organization/alerting.rb +56 -6
  106. data/lib/webhookdb/organization/database_migration.rb +2 -2
  107. data/lib/webhookdb/organization/db_builder.rb +5 -4
  108. data/lib/webhookdb/organization/error_handler.rb +141 -0
  109. data/lib/webhookdb/organization.rb +76 -10
  110. data/lib/webhookdb/postgres/model.rb +1 -0
  111. data/lib/webhookdb/postgres/model_utilities.rb +2 -0
  112. data/lib/webhookdb/postgres.rb +3 -4
  113. data/lib/webhookdb/replicator/base.rb +202 -68
  114. data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
  115. data/lib/webhookdb/replicator/column.rb +2 -0
  116. data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
  117. data/lib/webhookdb/replicator/fake.rb +106 -88
  118. data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +131 -61
  119. data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
  120. data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +197 -32
  121. data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
  122. data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
  123. data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
  124. data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
  125. data/lib/webhookdb/replicator/intercom_v1_mixin.rb +49 -6
  126. data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
  127. data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
  128. data/lib/webhookdb/replicator/signalwire_message_v1.rb +31 -1
  129. data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
  130. data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
  131. data/lib/webhookdb/replicator/transistor_episode_v1.rb +11 -5
  132. data/lib/webhookdb/replicator/webhook_request.rb +8 -0
  133. data/lib/webhookdb/replicator.rb +6 -3
  134. data/lib/webhookdb/service/helpers.rb +4 -0
  135. data/lib/webhookdb/service/middleware.rb +6 -2
  136. data/lib/webhookdb/service/view_api.rb +1 -1
  137. data/lib/webhookdb/service.rb +10 -10
  138. data/lib/webhookdb/service_integration.rb +19 -1
  139. data/lib/webhookdb/signalwire.rb +1 -1
  140. data/lib/webhookdb/spec_helpers/async.rb +0 -4
  141. data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
  142. data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +239 -64
  143. data/lib/webhookdb/spec_helpers.rb +1 -0
  144. data/lib/webhookdb/sync_target.rb +202 -34
  145. data/lib/webhookdb/system_log_event.rb +9 -0
  146. data/lib/webhookdb/tasks/admin.rb +1 -1
  147. data/lib/webhookdb/tasks/annotate.rb +1 -1
  148. data/lib/webhookdb/tasks/db.rb +13 -1
  149. data/lib/webhookdb/tasks/docs.rb +1 -1
  150. data/lib/webhookdb/tasks/fixture.rb +1 -1
  151. data/lib/webhookdb/tasks/message.rb +1 -1
  152. data/lib/webhookdb/tasks/regress.rb +1 -1
  153. data/lib/webhookdb/tasks/release.rb +1 -1
  154. data/lib/webhookdb/tasks/sidekiq.rb +1 -1
  155. data/lib/webhookdb/tasks/specs.rb +1 -1
  156. data/lib/webhookdb/version.rb +1 -1
  157. data/lib/webhookdb/webhook_subscription.rb +3 -4
  158. data/lib/webhookdb.rb +34 -8
  159. metadata +114 -64
  160. data/lib/webhookdb/jobs/customer_created_notify_internal.rb +0 -22
  161. data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
  162. data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
  163. /data/lib/webhookdb/jobs/{logged_webhook_resilient_replay.rb → logged_webhooks_resilient_replay.rb} +0 -0
  164. /data/lib/webhookdb/jobs/{webhook_resource_notify_integrations.rb → webhookdb_resource_notify_integrations.rb} +0 -0
@@ -74,6 +74,10 @@ The secret to use for signing is:
74
74
  col.new(:row_updated_at, TIMESTAMP, index: true, optional: true, defaulter: :now),
75
75
  col.new(:last_synced_at, TIMESTAMP, index: true, optional: true),
76
76
  col.new(:ics_url, TEXT, converter: col.converter_gsub("^webcal", "https")),
77
+ col.new(:event_count, INTEGER, optional: true),
78
+ col.new(:feed_bytes, INTEGER, optional: true),
79
+ col.new(:last_sync_duration_ms, INTEGER, optional: true),
80
+ col.new(:last_fetch_context, OBJECT, optional: true),
77
81
  ]
78
82
  end
79
83
 
@@ -108,7 +112,7 @@ The secret to use for signing is:
108
112
  external_id = request.body.fetch("external_id")
109
113
  case request_type
110
114
  when "SYNC"
111
- super(request)
115
+ super
112
116
  Webhookdb::Jobs::IcalendarSync.perform_async(self.service_integration.id, external_id)
113
117
  return
114
118
  when "DELETE"
@@ -118,14 +122,12 @@ The secret to use for signing is:
118
122
  unless Webhookdb::RACK_ENV == "test"
119
123
  raise "someone tried to use the special unit test google event type outside of unit tests"
120
124
  end
121
- return super(request)
125
+ return super
122
126
  else
123
127
  raise ArgumentError, "Unknown request type: #{request_type}"
124
128
  end
125
129
  end
126
130
 
127
- CLEANUP_SERVICE_NAMES = ["icalendar_event_v1"].freeze
128
-
129
131
  def rows_needing_sync(dataset, now: Time.now)
130
132
  cutoff = now - Webhookdb::Icalendar.sync_period_hours.hours
131
133
  return dataset.where(Sequel[last_synced_at: nil] | Sequel.expr { last_synced_at < cutoff })
@@ -133,7 +135,7 @@ The secret to use for signing is:
133
135
 
134
136
  def delete_data_for_external_id(external_id)
135
137
  relevant_integrations = self.service_integration.recursive_dependents.
136
- filter { |d| CLEANUP_SERVICE_NAMES.include?(d.service_name) }
138
+ filter { |d| Webhookdb::Icalendar::EVENT_REPLICATORS.include?(d.service_name) }
137
139
  self.admin_dataset do |ds|
138
140
  ds.db.transaction do
139
141
  ds.where(external_id:).delete
@@ -154,7 +156,7 @@ The secret to use for signing is:
154
156
  @now = now
155
157
  end
156
158
 
157
- def upsert_page_size = 500
159
+ def upsert_page_size = 2000
158
160
  def conditional_upsert? = true
159
161
 
160
162
  def prepare_body(body)
@@ -163,14 +165,50 @@ The secret to use for signing is:
163
165
  end
164
166
  end
165
167
 
166
- def sync_row(row)
168
+ def sync_row(row, force: false, now: Time.now)
167
169
  Appydays::Loggable.with_log_tags(icalendar_url: row.fetch(:ics_url)) do
170
+ last_synced_at = row.fetch(:last_synced_at)
171
+ should_sync = force ||
172
+ last_synced_at.nil? ||
173
+ # If a proxy is configured, we always want to try to sync,
174
+ # since this could have come from a webhook, but also the proxy feed refresh TTL
175
+ # is likely much lower than ICALENDAR_SYNC_PERIOD_HOURS so it's good to check on it.
176
+ # The check is very fast (should 304) so is safe to do relatively often.
177
+ Webhookdb::Icalendar.proxy_url.present? ||
178
+ last_synced_at < (now - Webhookdb::Icalendar.sync_period_hours.hours)
179
+ unless should_sync
180
+ self.logger.info("skip_sync_recently_synced", last_synced_at:)
181
+ return
182
+ end
168
183
  self.with_advisory_lock(row.fetch(:pk)) do
169
- now = Time.now
170
- if (dep = self.find_dependent("icalendar_event_v1"))
171
- self._sync_row(row, dep, now:)
184
+ start = Time.now
185
+ if (dep = self.find_dependent(Webhookdb::Icalendar::EVENT_REPLICATORS))
186
+ if dep.replicator.avoid_writes?
187
+ # Check if this table is being vacuumed/etc. We use this instead of a semaphore job,
188
+ # since it's a better fit for icalendar, which is pre-scheduled, rather than reactive.
189
+ # That is, when we receive webhooks, a semaphore job gives us a more predictable rate;
190
+ # but icalendar rate is negotiated in advance (when enqueing jobs),
191
+ # and we can be more 'helpful' to something like a vacuum by not running any jobs at all.
192
+ self.logger.info("skip_sync_table_locked")
193
+ raise Amigo::Retry::Retry, 60.seconds + (rand * 10.seconds)
194
+ end
195
+ processor = self._sync_row(row, dep, now:)
196
+ end
197
+ self.admin_dataset do |ds|
198
+ ds.where(pk: row.fetch(:pk)).
199
+ update(
200
+ last_synced_at: now,
201
+ event_count: processor&.upserted_identities&.count,
202
+ feed_bytes: processor&.read_bytes,
203
+ last_sync_duration_ms: (Time.now - start).in_milliseconds,
204
+ last_fetch_context: {
205
+ "hash" => processor&.feed_hash,
206
+ "content_type" => processor&.headers&.fetch("Content-Type", nil),
207
+ "content_length" => processor&.headers&.fetch("Content-Length", nil),
208
+ "etag" => processor&.headers&.fetch("Etag", nil),
209
+ }.to_json,
210
+ )
172
211
  end
173
- self.admin_dataset { |ds| ds.where(pk: row.fetch(:pk)).update(last_synced_at: now) }
174
212
  end
175
213
  end
176
214
  end
@@ -179,14 +217,19 @@ The secret to use for signing is:
179
217
  calendar_external_id = row.fetch(:external_id)
180
218
  begin
181
219
  request_url = self._clean_ics_url(row.fetch(:ics_url))
182
- io = Webhookdb::Http.chunked_download(request_url, rewindable: false)
183
- rescue Down::Error, URI::InvalidURIError => e
220
+ io = self._make_ics_request(request_url, row.fetch(:last_fetch_context))
221
+ rescue Down::Error,
222
+ URI::InvalidURIError,
223
+ HTTPX::NativeResolveError,
224
+ HTTPX::InsecureRedirectError,
225
+ HTTPX::Connection::HTTP2::Error,
226
+ EOFError => e
184
227
  self._handle_down_error(e, request_url:, calendar_external_id:)
185
228
  return
186
229
  end
187
230
 
188
231
  upserter = Upserter.new(dep.replicator, calendar_external_id, now:)
189
- processor = EventProcessor.new(io, upserter)
232
+ processor = EventProcessor.new(io:, upserter:, headers: io.data[:headers])
190
233
  processor.process
191
234
  # Delete all the extra replicator rows, and cancel all the rows that weren't upserted.
192
235
  dep.replicator.admin_dataset do |ds|
@@ -204,6 +247,25 @@ The secret to use for signing is:
204
247
  row_updated_at: now,
205
248
  )
206
249
  end
250
+ return processor
251
+ end
252
+
253
+ def _make_ics_request(request_url, last_fetch_context)
254
+ # Some servers require a VERY explicit accept header,
255
+ # so tell them we prefer icalendar here.
256
+ # Using Httpx, Accept-Encoding is gzip,deflate
257
+ # which seems fine (server should use identity as worst case).
258
+ headers = {
259
+ "Accept" => "text/calendar,*/*",
260
+ }
261
+ headers["If-None-Match"] = last_fetch_context["etag"] if last_fetch_context & ["etag"]
262
+ if (proxy_url = Webhookdb::Icalendar.proxy_url).present?
263
+ request_url = "#{proxy_url.delete_suffix('/')}/?url=#{URI.encode_www_form_component(request_url)}"
264
+ headers["Authorization"] = "Apikey #{Webhookdb::Icalendar.proxy_api_key}" if
265
+ Webhookdb::Icalendar.proxy_api_key.present?
266
+ end
267
+ resp = Webhookdb::Http.chunked_download(request_url, rewindable: false, headers:)
268
+ return resp
207
269
  end
208
270
 
209
271
  # We get all sorts of strange urls, fix up what we can.
@@ -224,13 +286,31 @@ The secret to use for signing is:
224
286
  self.logger.info("icalendar_fetch_not_modified", response_status: 304, request_url:, calendar_external_id:)
225
287
  return
226
288
  when Down::SSLError
227
- self._handle_retryable_down_error!(e, request_url:, calendar_external_id:)
228
- when Down::TimeoutError, Down::ConnectionError, Down::InvalidUrl, URI::InvalidURIError
289
+ # Most SSL errors are transient and can be retried, but some are due to a long-term misconfiguration.
290
+ # Handle these with an alert, like if we had a 404, which indicates a longer-term issue.
291
+ is_fatal =
292
+ # There doesn't appear to be a way to allow unsafe legacy content negotiation on a per-request basis,
293
+ # it is compiled into OpenSSL (may be wrong about this).
294
+ e.to_s.include?("unsafe legacy renegotiation disabled") ||
295
+ # Certificate failures are not transient
296
+ e.to_s.include?("certificate verify failed")
297
+ if is_fatal
298
+ response_status = 0
299
+ response_body = e.to_s
300
+ else
301
+ self._handle_retryable_down_error!(e, request_url:, calendar_external_id:)
302
+ end
303
+ when Down::TimeoutError, Down::ConnectionError, Down::InvalidUrl,
304
+ Errno::ECONNRESET,
305
+ URI::InvalidURIError,
306
+ HTTPX::NativeResolveError, HTTPX::InsecureRedirectError,
307
+ HTTPX::Connection::HTTP2::Error,
308
+ EOFError
229
309
  response_status = 0
230
310
  response_body = e.to_s
231
311
  when Down::ClientError
232
312
  raise e if e.response.nil?
233
- response_status = e.response.code.to_i
313
+ response_status = e.response.status.to_i
234
314
  self._handle_retryable_down_error!(e, request_url:, calendar_external_id:) if
235
315
  self._retryable_client_error?(e, request_url:)
236
316
  # These are all the errors we've seen, we can't do anything about.
@@ -242,25 +322,32 @@ The secret to use for signing is:
242
322
  404, 405, # Fundamental issues with the URL given
243
323
  409, 410, # More access problems
244
324
  417, # If someone uses an Outlook HTML calendar, fetch gives us a 417
325
+ 422, # Sometimes used instead of 404
245
326
  429, # Usually 429s are retried (as above), but in some cases they're not.
327
+ 500, 503, 504, # Intermittent server issues, usually
328
+ 599, # Represents a timeout in icalproxy
246
329
  ]
247
330
  # For most client errors, we can't do anything about it. For example,
248
331
  # and 'unshared' URL could result in a 401, 403, 404, or even a 405.
249
332
  # For now, other client errors, we can raise on,
250
333
  # in case it's something we can fix/work around.
251
334
  # For example, it's possible something like a 415 is a WebhookDB issue.
335
+ if response_status == 421 && (origin_err = e.response.headers["Ical-Proxy-Origin-Error"])
336
+ response_status = origin_err.to_i
337
+ end
252
338
  raise e unless expected_errors.include?(response_status)
253
- response_body = e.response.body.to_s
339
+ response_body = self._safe_read_body(e)
254
340
  when Down::ServerError
255
- response_status = e.response.code.to_i
256
- response_body = e.response.body.to_s
341
+ response_status = e.response.status.to_i
342
+ response_body = self._safe_read_body(e)
257
343
  else
258
344
  response_body = nil
259
345
  response_status = nil
260
346
  end
261
347
  raise e if response_status.nil?
348
+ loggable_body = response_body && response_body[..256]
262
349
  self.logger.warn("icalendar_fetch_error",
263
- response_body:, response_status:, request_url:, calendar_external_id:,)
350
+ response_body: loggable_body, response_status:, request_url:, calendar_external_id:,)
264
351
  message = Webhookdb::Messages::ErrorIcalendarFetch.new(
265
352
  self.service_integration,
266
353
  calendar_external_id,
@@ -269,11 +356,19 @@ The secret to use for signing is:
269
356
  request_url:,
270
357
  request_method: "GET",
271
358
  )
272
- self.service_integration.organization.alerting.dispatch_alert(message)
359
+ self.service_integration.organization.alerting.dispatch_alert(message, separate_connection: false)
360
+ end
361
+
362
+ # We can hit an error while reading the error body, since it was opened as a stream.
363
+ # Ignore those errors.
364
+ def _safe_read_body(e)
365
+ return e.response.body.to_s
366
+ rescue OpenSSL::SSL::SSLError, HTTPX::Error
367
+ return "<error reading body>"
273
368
  end
274
369
 
275
370
  def _retryable_client_error?(e, request_url:)
276
- code = e.response.code.to_i
371
+ code = e.response.status.to_i
277
372
  # This is a bad domain that returns 429 for most requests.
278
373
  # Tell the org admins it won't sync.
279
374
  return false if code == 429 && request_url.start_with?("https://ical.schedulestar.com")
@@ -290,7 +385,7 @@ The secret to use for signing is:
290
385
  retry_in = rand(4..60).minutes
291
386
  self.logger.debug(
292
387
  "icalendar_fetch_error_retry",
293
- response_status: e.respond_to?(:response) ? e.response&.code : 0,
388
+ response_status: e.respond_to?(:response) ? e.response&.status : 0,
294
389
  request_url:,
295
390
  calendar_external_id:,
296
391
  retry_at: Time.now + retry_in,
@@ -299,11 +394,12 @@ The secret to use for signing is:
299
394
  end
300
395
 
301
396
  class EventProcessor
302
- attr_reader :upserted_identities
397
+ attr_reader :upserted_identities, :read_bytes, :headers
303
398
 
304
- def initialize(io, upserter)
399
+ def initialize(io:, upserter:, headers:)
305
400
  @io = io
306
401
  @upserter = upserter
402
+ @headers = headers
307
403
  # Keep track of everything we upsert. For any rows we aren't upserting,
308
404
  # delete them if they're recurring, or cancel them if they're not recurring.
309
405
  # If doing it this way is slow, we could invert this (pull down all IDs and pop from the set).
@@ -316,8 +412,14 @@ The secret to use for signing is:
316
412
  # We need to keep track of how many events each UID spawns,
317
413
  # so we can delete any with a higher count.
318
414
  @max_sequence_num_by_uid = {}
415
+ # Keep track of the bytes we've read from the file.
416
+ # Never trust Content-Length headers for ical feeds.
417
+ @read_bytes = 0
418
+ @feed_md5 = Digest::MD5.new
319
419
  end
320
420
 
421
+ def feed_hash = @feed_md5.hexdigest
422
+
321
423
  def delete_condition
322
424
  return nil if @max_sequence_num_by_uid.empty?
323
425
  return @max_sequence_num_by_uid.map do |uid, n|
@@ -456,7 +558,14 @@ The secret to use for signing is:
456
558
  # The new UID has the sequence number.
457
559
  e["UID"] = {"v" => "#{uid}-#{idx}"}
458
560
  e["DTSTART"] = self._ical_entry_from_ruby(occ.start_time, start_entry, is_date)
459
- e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date) if has_end_time
561
+ if has_end_time
562
+ if !is_date && end_entry["VALUE"] == "DATE"
563
+ # It's possible that DTSTART is a time, but DTEND is a date. This makes no sense,
564
+ # so skip setting an end date. It will be in the :data column at least.
565
+ else
566
+ e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date)
567
+ end
568
+ end
460
569
  yield e
461
570
  final_sequence = idx
462
571
  break if occ.start_time > dont_project_after
@@ -474,7 +583,15 @@ The secret to use for signing is:
474
583
  def _ical_entry_from_ruby(r, entry, is_date)
475
584
  return {"v" => r.strftime("%Y%m%d")} if is_date
476
585
  return {"v" => r.strftime("%Y%m%dT%H%M%SZ")} if r.zone == "UTC"
477
- return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => entry.fetch("TZID")}
586
+ tzid = entry["TZID"]
587
+ return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => tzid} if tzid
588
+ value = entry.fetch("v")
589
+ return {"v" => value} if value.end_with?("Z")
590
+ if /^\d{8}T\d{6}$/.match?(value)
591
+ @upserter.upserting_replicator.logger.warn "ical_assuming_utc_time", ical_entry: entry, ruby_time: r
592
+ return {"v" => "#{value}Z"}
593
+ end
594
+ raise "Cannot create ical entry from: '#{r}', #{entry}"
478
595
  end
479
596
 
480
597
  def _icecube_rule_from_ical(ical)
@@ -483,11 +600,20 @@ The secret to use for signing is:
483
600
  # IceCube errors, because `day_of_month` isn't valid on a WeeklyRule.
484
601
  # In this case, we need to sanitize the string to remove the offending rule piece.
485
602
  # There are probably many other offending formats, but we'll add them here as needed.
603
+ unambiguous_ical = nil
486
604
  if ical.include?("FREQ=WEEKLY") && ical.include?("BYMONTHDAY=")
487
- ical = ical.gsub(/BYMONTHDAY=[\d,]+/, "")
488
- ical.delete_prefix! ";"
489
- ical.delete_suffix! ";"
490
- ical.squeeze!(";")
605
+ unambiguous_ical = ical.gsub(/BYMONTHDAY=[\d,]+/, "")
606
+ elsif ical.include?("FREQ=MONTHLY") && ical.include?("BYYEARDAY=") && ical.include?("BYMONTHDAY=")
607
+ # Another rule: FREQ=MONTHLY;INTERVAL=3;BYYEARDAY=14;BYMONTHDAY=14
608
+ # Apple interprets this as monthly on the 14th; rrule.js interprets this as never happening.
609
+ # 'day_of_year' isn't valid on a MonthlyRule, so delete the BYYEARDAY component.
610
+ unambiguous_ical = ical.gsub(/BYYEARDAY=[\d,]+/, "")
611
+ end
612
+ if unambiguous_ical
613
+ unambiguous_ical.delete_prefix! ";"
614
+ unambiguous_ical.delete_suffix! ";"
615
+ unambiguous_ical.squeeze!(";")
616
+ ical = unambiguous_ical
491
617
  end
492
618
  return IceCube::IcalParser.rule_from_ical(ical)
493
619
  end
@@ -507,6 +633,8 @@ The secret to use for signing is:
507
633
  vevent_lines = []
508
634
  in_vevent = false
509
635
  while (line = @io.gets)
636
+ @read_bytes += line.size
637
+ @feed_md5.update(line)
510
638
  begin
511
639
  line.rstrip!
512
640
  rescue Encoding::CompatibilityError
@@ -545,4 +673,41 @@ The secret to use for signing is:
545
673
  @upserter.upserting_replicator.logger.warn("invalid_vevent_hash", vevent_uids: bad_event_uids.sort)
546
674
  end
547
675
  end
676
+
677
+ # Return true if the data in the feed has changed from what was last synced,
678
+ # or false if it has not so the sync can be skipped.
679
+ # This operation is meant to be resource-light (most of the work is the HTTP request),
680
+ # so should be done in a threadpool.
681
+ #
682
+ # - If we have no previous fetch context, we sync.
683
+ # - If the fetch errors, sync, because we want the normal error handler to figure it out
684
+ # (alert admins, etc).
685
+ # - If the last fetch's content type and length is different from the current, we sync.
686
+ # - Download the bytes. If the hash of the bytes is different from what was last processed,
687
+ # sync. Since this involves reading the streaming body, we must return a copy of the body (a StringIO).
688
+ def feed_changed?(row)
689
+ last_fetch = row.fetch(:last_fetch_context)
690
+ return true if last_fetch.nil? || last_fetch.empty?
691
+
692
+ begin
693
+ url = self._clean_ics_url(row.fetch(:ics_url))
694
+ resp = self._make_ics_request(url, last_fetch)
695
+ rescue Down::NotModified
696
+ return false
697
+ rescue StandardError
698
+ return true
699
+ end
700
+ headers = resp.data[:headers] || {}
701
+ content_type_match = headers["Content-Type"] == last_fetch["content_type"] &&
702
+ headers["Content-Length"] == last_fetch["content_length"]
703
+ return true unless content_type_match
704
+ last_hash = last_fetch["hash"]
705
+ return true if last_hash.nil?
706
+
707
+ hash = Digest::MD5.new
708
+ while (line = resp.gets)
709
+ hash.update(line)
710
+ end
711
+ return hash.hexdigest != last_hash
712
+ end
548
713
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "webhookdb/icalendar"
4
4
  require "webhookdb/windows_tz"
5
+ require "webhookdb/replicator/base_stale_row_deleter"
5
6
 
6
7
  class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
7
8
  include Appydays::Loggable
@@ -115,7 +116,6 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
115
116
  :compound_identity,
116
117
  TEXT,
117
118
  data_key: "<compound key, see converter>",
118
- index: true,
119
119
  converter: CONV_REMOTE_KEY,
120
120
  optional: true, # This is done via the converter, data_key never exists
121
121
  )
@@ -166,6 +166,7 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
166
166
  data.delete("calendar_external_id")
167
167
  data.delete("recurring_event_id")
168
168
  data.delete("recurring_event_sequence")
169
+ data.delete("row_updated_at")
169
170
  return data
170
171
  end
171
172
 
@@ -215,11 +216,21 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
215
216
  columns: [:calendar_external_id, :start_date, :end_date],
216
217
  where: Sequel[:status].is_distinct_from("CANCELLED") & (Sequel[:start_date] !~ nil),
217
218
  ),
219
+ Webhookdb::Replicator::IndexSpec.new(
220
+ columns: [:row_updated_at],
221
+ where: Sequel[status: "CANCELLED"],
222
+ identifier: "cancelled_row_updated_at",
223
+ ),
218
224
  ]
219
225
  end
220
226
 
221
227
  def _update_where_expr
222
- return self.qualified_table_sequel_identifier[:last_modified_at] < Sequel[:excluded][:last_modified_at]
228
+ # Compare against data to avoid the constant writes. JSONB != operations are very fast,
229
+ # so this should not be any real performance issue.
230
+ # last_modified_at is unreliable because LAST-MODIFIED is unreliable,
231
+ # even in feeds it is set. There are cases, such as adding an EXDATE to an RRULE,
232
+ # that do not trigger LAST-MODIFIED changes.
233
+ return self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
223
234
  end
224
235
 
225
236
  # @param [Array<String>] lines
@@ -369,50 +380,15 @@ class Webhookdb::Replicator::IcalendarEventV1 < Webhookdb::Replicator::Base
369
380
  # +stale_at+ to +age_cutoff+. This avoids endlessly adding to the icalendar events table
370
381
  # due to feeds that change UIDs each fetch- events with changed UIDs will become CANCELLED,
371
382
  # and then deleted over time.
372
- # @param stale_at [Time] When an event is considered 'stale'.
373
- # If stale events are a big problem, this can be shortened to just a few days.
374
- # @param age_cutoff [Time] Where to stop searching for old events.
375
- # This is important to avoid a full table scale when deleting events,
376
- # since otherwise it is like 'row_updated_at < 35.days.ago'.
377
- # Since this routine should run regularly, we should rarely have events more than 35 or 36 days old,
378
- # for example.
379
- # Use +nil+ to use no limit (a full table scan) which may be necessary when running this feature
380
- # for the first time.
381
- # @param chunk_size [Integer] The row delete is done in chunks to avoid long locks.
382
- # The default seems safe, but it's exposed as a parameter if you need to play around with it,
383
- # and can be done via configuration if needed at some point.
384
- def delete_stale_cancelled_events(
385
- stale_at: Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days.ago,
386
- age_cutoff: (Webhookdb::Icalendar.stale_cancelled_event_threshold_days + 10).days.ago,
387
- chunk_size: 10_000
388
- )
389
- # Delete in chunks, like:
390
- # DELETE from "public"."icalendar_event_v1_aaaa"
391
- # WHERE pk IN (
392
- # SELECT pk FROM "public"."icalendar_event_v1_aaaa"
393
- # WHERE row_updated_at < (now() - '35 days'::interval)
394
- # LIMIT 10000
395
- # )
396
- age = age_cutoff..stale_at
397
- self.admin_dataset do |ds|
398
- chunk_ds = ds.where(row_updated_at: age, status: "CANCELLED").select(:pk).limit(chunk_size)
399
- loop do
400
- # Due to conflicts where a feed is being inserted while the delete is happening,
401
- # this may raise an error like:
402
- # deadlock detected
403
- # DETAIL: Process 18352 waits for ShareLock on transaction 435085606; blocked by process 24191.
404
- # Process 24191 waits for ShareLock on transaction 435085589; blocked by process 18352.
405
- # HINT: See server log for query details.
406
- # CONTEXT: while deleting tuple (2119119,3) in relation "icalendar_event_v1_aaaa"
407
- # Unit testing this is very difficult though, and in practice it is rare,
408
- # and normal Sidekiq job retries should be sufficient to handle this.
409
- # So we don't explicitly handle deadlocks, but could if it becomes an issue.
410
- deleted = ds.where(pk: chunk_ds).delete
411
- break if deleted != chunk_size
412
- end
413
- end
383
+ class StaleRowDeleter < Webhookdb::Replicator::BaseStaleRowDeleter
384
+ def stale_at = Webhookdb::Icalendar.stale_cancelled_event_threshold_days.days
385
+ def lookback_window = Webhookdb::Icalendar.stale_cancelled_event_lookback_days.days
386
+ def updated_at_column = :row_updated_at
387
+ def stale_condition = {status: "CANCELLED"}
414
388
  end
415
389
 
390
+ def stale_row_deleter = StaleRowDeleter.new(self)
391
+
416
392
  def calculate_webhook_state_machine
417
393
  if (step = self.calculate_dependency_state_machine_step(dependency_help: ""))
418
394
  return step
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "webhookdb/replicator/icalendar_event_v1"
4
+ require "webhookdb/replicator/partitionable_mixin"
5
+
6
+ class Webhookdb::Replicator::IcalendarEventV1Partitioned < Webhookdb::Replicator::IcalendarEventV1
7
+ include Webhookdb::Replicator::PartitionableMixin
8
+
9
+ # @return [Webhookdb::Replicator::Descriptor]
10
+ def self.descriptor
11
+ return Webhookdb::Replicator::Descriptor.new(
12
+ name: "icalendar_event_v1_partitioned",
13
+ ctor: ->(sint) { self.new(sint) },
14
+ dependency_descriptor: Webhookdb::Replicator::IcalendarCalendarV1.descriptor,
15
+ feature_roles: ["partitioning_beta"],
16
+ resource_name_singular: "iCalendar Event",
17
+ supports_webhooks: true,
18
+ description: "Individual events in an icalendar, using partitioned tables rather than one big table. " \
19
+ "See icalendar_calendar_v1.",
20
+ api_docs_url: "https://icalendar.org/",
21
+ )
22
+ end
23
+
24
+ def _denormalized_columns
25
+ d = super
26
+ d << Webhookdb::Replicator::Column.new(:calendar_external_hash, INTEGER, optional: true)
27
+ return d
28
+ end
29
+
30
+ def partition_method = Webhookdb::DBAdapter::Partitioning::HASH
31
+ def partition_column_name = :calendar_external_hash
32
+ def partition_value(resource) = self._str2inthash(resource.fetch("calendar_external_id"))
33
+ end
@@ -43,6 +43,7 @@ class Webhookdb::Replicator::IntercomContactV1 < Webhookdb::Replicator::Base
43
43
  end
44
44
 
45
45
  def _mixin_backfill_url = "https://api.intercom.io/contacts"
46
+ def _mixin_backfill_hashkey = "data"
46
47
 
47
48
  def _resource_and_event(request)
48
49
  resource, event = super
@@ -40,6 +40,7 @@ class Webhookdb::Replicator::IntercomConversationV1 < Webhookdb::Replicator::Bas
40
40
  end
41
41
 
42
42
  def _mixin_backfill_url = "https://api.intercom.io/conversations"
43
+ def _mixin_backfill_hashkey = "conversations"
43
44
 
44
45
  def _resource_and_event(request)
45
46
  resource, event = super
@@ -22,10 +22,9 @@ module Webhookdb::Replicator::IntercomV1Mixin
22
22
  # webhook verification, which means that webhooks actually don't require any setup on the integration level. Thus,
23
23
  # `supports_webhooks` is false.
24
24
  def find_auth_integration
25
- # rubocop:disable Naming/MemoizedInstanceVariableName
26
- return @auth ||= Webhookdb::Replicator.find_at_root!(self.service_integration,
27
- service_name: "intercom_marketplace_root_v1",)
28
- # rubocop:enable Naming/MemoizedInstanceVariableName
25
+ return @find_auth_integration ||= Webhookdb::Replicator.find_at_root!(
26
+ self.service_integration, service_name: "intercom_marketplace_root_v1",
27
+ )
29
28
  end
30
29
 
31
30
  def intercom_auth_headers
@@ -74,8 +73,9 @@ module Webhookdb::Replicator::IntercomV1Mixin
74
73
  end
75
74
 
76
75
  def _mixin_backfill_url = raise NotImplementedError
76
+ def _mixin_backfill_hashkey = raise NotImplementedError
77
77
 
78
- def _fetch_backfill_page(pagination_token, **_kwargs)
78
+ def _fetch_backfill_page(pagination_token, last_backfilled:)
79
79
  unless self.auth_credentials?
80
80
  raise Webhookdb::Replicator::CredentialsMissing,
81
81
  "This integration requires that the Intercom Auth integration has a valid Auth Token"
@@ -93,6 +93,28 @@ module Webhookdb::Replicator::IntercomV1Mixin
93
93
  timeout: Webhookdb::Intercom.http_timeout,
94
94
  )
95
95
  rescue Webhookdb::Http::Error => e
96
+ is_token_suspended = e.status == 401 &&
97
+ e.response["errors"].present? &&
98
+ e.response["errors"].any? { |er| er["code"] == "token_suspended" }
99
+ if is_token_suspended
100
+ root_sint = self.find_auth_integration
101
+ message = "Organization has closed their Intercom workspace and this integration should be deleted. " \
102
+ "From a console, run: " \
103
+ "Webhookdb::ServiceIntegration[#{root_sint.id}].destroy_self_and_all_dependents"
104
+ Webhookdb::DeveloperAlert.new(
105
+ subsystem: "Intercom Workspace Closed Error",
106
+ emoji: ":hook:",
107
+ fallback: message,
108
+ fields: [
109
+ {title: "Organization", value: root_sint.organization.name, short: true},
110
+ {title: "Integration ID", value: root_sint.id.to_s, short: true},
111
+ {title: "Instructions", value: message},
112
+ ],
113
+ ).emit
114
+ # Noop here since there's nothing to do, the developer alert takes care of notifying
115
+ # so no need to error or log.
116
+ return [], nil
117
+ end
96
118
  # We are looking to catch the "api plan restricted" error. This is always a 403 and every
97
119
  # 403 will be an "api plan restricted" error according to the API documentation. Because we
98
120
  # specify the API version in our headers we can expect that this won't change.
@@ -102,8 +124,29 @@ module Webhookdb::Replicator::IntercomV1Mixin
102
124
  # a TypeError in the backfiller.
103
125
  return [], nil
104
126
  end
105
- data = response.parsed_response.fetch("data", [])
127
+ data = response.parsed_response.fetch(self._mixin_backfill_hashkey)
106
128
  starting_after = response.parsed_response.dig("pages", "next", "starting_after")
129
+ # Intercom pagination sorts by updated_at newest. So if we are doing an incremental sync (last_backfilled set),
130
+ # and we last backfilled after the latest updated_at, we can stop paginating.
131
+ if last_backfilled && data.last && data.last["updated_at"]
132
+ oldest_update = Time.at(data.last["updated_at"])
133
+ starting_after = nil if oldest_update < last_backfilled
134
+ end
107
135
  return data, starting_after
108
136
  end
137
+
138
+ def _backfillers
139
+ return [Backfiller.new(self)]
140
+ end
141
+
142
+ class Backfiller < Webhookdb::Replicator::Base::ServiceBackfiller
143
+ include Webhookdb::Backfiller::Bulk
144
+
145
+ # Upsert for each API call
146
+ def upsert_page_size = Webhookdb::Intercom.page_size
147
+ def prepare_body(_body) = nil
148
+ def upserting_replicator = self.svc
149
+ # We don't want to override newer items from webhooks, so use conditional upsert.
150
+ def conditional_upsert? = true
151
+ end
109
152
  end