webhookdb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. checksums.yaml +7 -0
  2. data/data/messages/layouts/blank.email.liquid +10 -0
  3. data/data/messages/layouts/minimal.email.liquid +28 -0
  4. data/data/messages/layouts/standard.email.liquid +28 -0
  5. data/data/messages/partials/button.liquid +15 -0
  6. data/data/messages/partials/environment_banner.liquid +9 -0
  7. data/data/messages/partials/footer.liquid +22 -0
  8. data/data/messages/partials/greeting.liquid +3 -0
  9. data/data/messages/partials/logo_header.liquid +18 -0
  10. data/data/messages/partials/signoff.liquid +1 -0
  11. data/data/messages/styles/v1.liquid +346 -0
  12. data/data/messages/templates/errors/icalendar_fetch.email.liquid +29 -0
  13. data/data/messages/templates/invite.email.liquid +15 -0
  14. data/data/messages/templates/new_customer.email.liquid +24 -0
  15. data/data/messages/templates/org_database_migration_finished.email.liquid +7 -0
  16. data/data/messages/templates/org_database_migration_started.email.liquid +9 -0
  17. data/data/messages/templates/specs/_field_partial.liquid +1 -0
  18. data/data/messages/templates/specs/basic.email.liquid +2 -0
  19. data/data/messages/templates/specs/basic.fake.liquid +1 -0
  20. data/data/messages/templates/specs/with_field.email.liquid +2 -0
  21. data/data/messages/templates/specs/with_field.fake.liquid +1 -0
  22. data/data/messages/templates/specs/with_include.email.liquid +2 -0
  23. data/data/messages/templates/specs/with_partial.email.liquid +1 -0
  24. data/data/messages/templates/verification.email.liquid +14 -0
  25. data/data/messages/templates/verification.sms.liquid +1 -0
  26. data/data/messages/web/install-customer-login.liquid +48 -0
  27. data/data/messages/web/install-error.liquid +17 -0
  28. data/data/messages/web/install-success.liquid +35 -0
  29. data/data/messages/web/install.liquid +20 -0
  30. data/data/messages/web/partials/footer.liquid +4 -0
  31. data/data/messages/web/partials/form_error.liquid +1 -0
  32. data/data/messages/web/partials/header.liquid +3 -0
  33. data/data/messages/web/styles.liquid +134 -0
  34. data/data/windows_tz.txt +461 -0
  35. data/db/migrations/001_testing_pixies.rb +13 -0
  36. data/db/migrations/002_initial.rb +132 -0
  37. data/db/migrations/003_ux_overhaul.rb +20 -0
  38. data/db/migrations/004_incremental_backfill.rb +9 -0
  39. data/db/migrations/005_log_webhooks.rb +24 -0
  40. data/db/migrations/006_generalize_roles.rb +29 -0
  41. data/db/migrations/007_org_dns.rb +12 -0
  42. data/db/migrations/008_webhook_subscriptions.rb +19 -0
  43. data/db/migrations/009_nonunique_stripe_subscription_customer.rb +16 -0
  44. data/db/migrations/010_drop_integration_soft_delete.rb +14 -0
  45. data/db/migrations/011_webhook_subscriptions_created_at.rb +10 -0
  46. data/db/migrations/012_webhook_subscriptions_created_by.rb +9 -0
  47. data/db/migrations/013_default_org_membership.rb +30 -0
  48. data/db/migrations/014_webhook_subscription_deliveries.rb +26 -0
  49. data/db/migrations/015_dependent_integrations.rb +9 -0
  50. data/db/migrations/016_encrypted_columns.rb +9 -0
  51. data/db/migrations/017_skip_verification.rb +9 -0
  52. data/db/migrations/018_sync_targets.rb +25 -0
  53. data/db/migrations/019_org_schema.rb +9 -0
  54. data/db/migrations/020_org_database_migrations.rb +25 -0
  55. data/db/migrations/021_no_default_org_schema.rb +14 -0
  56. data/db/migrations/022_database_document.rb +15 -0
  57. data/db/migrations/023_sync_target_schema.rb +9 -0
  58. data/db/migrations/024_org_semaphore_jobs.rb +9 -0
  59. data/db/migrations/025_integration_backfill_cursor.rb +9 -0
  60. data/db/migrations/026_undo_integration_backfill_cursor.rb +9 -0
  61. data/db/migrations/027_sync_target_http_sync.rb +12 -0
  62. data/db/migrations/028_logged_webhook_path.rb +24 -0
  63. data/db/migrations/029_encrypt_columns.rb +97 -0
  64. data/db/migrations/030_org_sync_target_timeout.rb +9 -0
  65. data/db/migrations/031_org_max_query_rows.rb +9 -0
  66. data/db/migrations/032_remove_db_defaults.rb +12 -0
  67. data/db/migrations/033_backfill_jobs.rb +26 -0
  68. data/db/migrations/034_backfill_job_criteria.rb +9 -0
  69. data/db/migrations/035_synchronous_backfill.rb +9 -0
  70. data/db/migrations/036_oauth.rb +26 -0
  71. data/db/migrations/037_oauth_used.rb +9 -0
  72. data/lib/amigo/durable_job.rb +416 -0
  73. data/lib/pry/clipboard.rb +111 -0
  74. data/lib/sequel/advisory_lock.rb +65 -0
  75. data/lib/webhookdb/admin.rb +4 -0
  76. data/lib/webhookdb/admin_api/auth.rb +36 -0
  77. data/lib/webhookdb/admin_api/customers.rb +63 -0
  78. data/lib/webhookdb/admin_api/database_documents.rb +20 -0
  79. data/lib/webhookdb/admin_api/entities.rb +66 -0
  80. data/lib/webhookdb/admin_api/message_deliveries.rb +61 -0
  81. data/lib/webhookdb/admin_api/roles.rb +15 -0
  82. data/lib/webhookdb/admin_api.rb +34 -0
  83. data/lib/webhookdb/aggregate_result.rb +63 -0
  84. data/lib/webhookdb/api/auth.rb +122 -0
  85. data/lib/webhookdb/api/connstr_auth.rb +36 -0
  86. data/lib/webhookdb/api/db.rb +188 -0
  87. data/lib/webhookdb/api/demo.rb +14 -0
  88. data/lib/webhookdb/api/entities.rb +198 -0
  89. data/lib/webhookdb/api/helpers.rb +253 -0
  90. data/lib/webhookdb/api/install.rb +296 -0
  91. data/lib/webhookdb/api/me.rb +53 -0
  92. data/lib/webhookdb/api/organizations.rb +254 -0
  93. data/lib/webhookdb/api/replay.rb +64 -0
  94. data/lib/webhookdb/api/service_integrations.rb +402 -0
  95. data/lib/webhookdb/api/services.rb +27 -0
  96. data/lib/webhookdb/api/stripe.rb +22 -0
  97. data/lib/webhookdb/api/subscriptions.rb +67 -0
  98. data/lib/webhookdb/api/sync_targets.rb +232 -0
  99. data/lib/webhookdb/api/system.rb +37 -0
  100. data/lib/webhookdb/api/webhook_subscriptions.rb +96 -0
  101. data/lib/webhookdb/api.rb +92 -0
  102. data/lib/webhookdb/apps.rb +93 -0
  103. data/lib/webhookdb/async/audit_logger.rb +38 -0
  104. data/lib/webhookdb/async/autoscaler.rb +84 -0
  105. data/lib/webhookdb/async/job.rb +18 -0
  106. data/lib/webhookdb/async/job_logger.rb +45 -0
  107. data/lib/webhookdb/async/scheduled_job.rb +18 -0
  108. data/lib/webhookdb/async.rb +142 -0
  109. data/lib/webhookdb/aws.rb +98 -0
  110. data/lib/webhookdb/backfill_job.rb +107 -0
  111. data/lib/webhookdb/backfiller.rb +107 -0
  112. data/lib/webhookdb/cloudflare.rb +39 -0
  113. data/lib/webhookdb/connection_cache.rb +177 -0
  114. data/lib/webhookdb/console.rb +71 -0
  115. data/lib/webhookdb/convertkit.rb +14 -0
  116. data/lib/webhookdb/crypto.rb +66 -0
  117. data/lib/webhookdb/customer/reset_code.rb +94 -0
  118. data/lib/webhookdb/customer.rb +347 -0
  119. data/lib/webhookdb/database_document.rb +72 -0
  120. data/lib/webhookdb/db_adapter/column_types.rb +37 -0
  121. data/lib/webhookdb/db_adapter/default_sql.rb +187 -0
  122. data/lib/webhookdb/db_adapter/pg.rb +96 -0
  123. data/lib/webhookdb/db_adapter/snowflake.rb +137 -0
  124. data/lib/webhookdb/db_adapter.rb +208 -0
  125. data/lib/webhookdb/dbutil.rb +92 -0
  126. data/lib/webhookdb/demo_mode.rb +100 -0
  127. data/lib/webhookdb/developer_alert.rb +51 -0
  128. data/lib/webhookdb/email_octopus.rb +21 -0
  129. data/lib/webhookdb/enumerable.rb +18 -0
  130. data/lib/webhookdb/fixtures/backfill_jobs.rb +72 -0
  131. data/lib/webhookdb/fixtures/customers.rb +65 -0
  132. data/lib/webhookdb/fixtures/database_documents.rb +27 -0
  133. data/lib/webhookdb/fixtures/faker.rb +41 -0
  134. data/lib/webhookdb/fixtures/logged_webhooks.rb +56 -0
  135. data/lib/webhookdb/fixtures/message_deliveries.rb +59 -0
  136. data/lib/webhookdb/fixtures/oauth_sessions.rb +24 -0
  137. data/lib/webhookdb/fixtures/organization_database_migrations.rb +37 -0
  138. data/lib/webhookdb/fixtures/organization_memberships.rb +54 -0
  139. data/lib/webhookdb/fixtures/organizations.rb +32 -0
  140. data/lib/webhookdb/fixtures/reset_codes.rb +23 -0
  141. data/lib/webhookdb/fixtures/service_integrations.rb +42 -0
  142. data/lib/webhookdb/fixtures/subscriptions.rb +33 -0
  143. data/lib/webhookdb/fixtures/sync_targets.rb +32 -0
  144. data/lib/webhookdb/fixtures/webhook_subscriptions.rb +35 -0
  145. data/lib/webhookdb/fixtures.rb +15 -0
  146. data/lib/webhookdb/formatting.rb +56 -0
  147. data/lib/webhookdb/front.rb +49 -0
  148. data/lib/webhookdb/github.rb +22 -0
  149. data/lib/webhookdb/google_calendar.rb +29 -0
  150. data/lib/webhookdb/heroku.rb +21 -0
  151. data/lib/webhookdb/http.rb +114 -0
  152. data/lib/webhookdb/icalendar.rb +17 -0
  153. data/lib/webhookdb/id.rb +17 -0
  154. data/lib/webhookdb/idempotency.rb +90 -0
  155. data/lib/webhookdb/increase.rb +42 -0
  156. data/lib/webhookdb/intercom.rb +23 -0
  157. data/lib/webhookdb/jobs/amigo_test_jobs.rb +118 -0
  158. data/lib/webhookdb/jobs/backfill.rb +32 -0
  159. data/lib/webhookdb/jobs/create_mirror_table.rb +18 -0
  160. data/lib/webhookdb/jobs/create_stripe_customer.rb +17 -0
  161. data/lib/webhookdb/jobs/customer_created_notify_internal.rb +22 -0
  162. data/lib/webhookdb/jobs/demo_mode_sync_data.rb +19 -0
  163. data/lib/webhookdb/jobs/deprecated_jobs.rb +19 -0
  164. data/lib/webhookdb/jobs/developer_alert_handle.rb +14 -0
  165. data/lib/webhookdb/jobs/durable_job_recheck_poller.rb +17 -0
  166. data/lib/webhookdb/jobs/emailer.rb +15 -0
  167. data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +25 -0
  168. data/lib/webhookdb/jobs/icalendar_sync.rb +23 -0
  169. data/lib/webhookdb/jobs/logged_webhook_replay.rb +17 -0
  170. data/lib/webhookdb/jobs/logged_webhook_resilient_replay.rb +15 -0
  171. data/lib/webhookdb/jobs/message_dispatched.rb +16 -0
  172. data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +21 -0
  173. data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +21 -0
  174. data/lib/webhookdb/jobs/organization_database_migration_run.rb +24 -0
  175. data/lib/webhookdb/jobs/prepare_database_connections.rb +22 -0
  176. data/lib/webhookdb/jobs/process_webhook.rb +47 -0
  177. data/lib/webhookdb/jobs/renew_watch_channel.rb +24 -0
  178. data/lib/webhookdb/jobs/replication_migration.rb +24 -0
  179. data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +23 -0
  180. data/lib/webhookdb/jobs/scheduled_backfills.rb +77 -0
  181. data/lib/webhookdb/jobs/send_invite.rb +15 -0
  182. data/lib/webhookdb/jobs/send_test_webhook.rb +25 -0
  183. data/lib/webhookdb/jobs/send_webhook.rb +20 -0
  184. data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +16 -0
  185. data/lib/webhookdb/jobs/sync_target_run_sync.rb +38 -0
  186. data/lib/webhookdb/jobs/trim_logged_webhooks.rb +15 -0
  187. data/lib/webhookdb/jobs/webhook_resource_notify_integrations.rb +30 -0
  188. data/lib/webhookdb/jobs/webhook_subscription_delivery_attempt.rb +29 -0
  189. data/lib/webhookdb/jobs.rb +4 -0
  190. data/lib/webhookdb/json.rb +113 -0
  191. data/lib/webhookdb/liquid/expose.rb +27 -0
  192. data/lib/webhookdb/liquid/filters.rb +16 -0
  193. data/lib/webhookdb/liquid/liquification.rb +26 -0
  194. data/lib/webhookdb/liquid/partial.rb +12 -0
  195. data/lib/webhookdb/logged_webhook/resilient.rb +95 -0
  196. data/lib/webhookdb/logged_webhook.rb +194 -0
  197. data/lib/webhookdb/message/body.rb +25 -0
  198. data/lib/webhookdb/message/delivery.rb +127 -0
  199. data/lib/webhookdb/message/email_transport.rb +133 -0
  200. data/lib/webhookdb/message/fake_transport.rb +54 -0
  201. data/lib/webhookdb/message/liquid_drops.rb +29 -0
  202. data/lib/webhookdb/message/template.rb +89 -0
  203. data/lib/webhookdb/message/transport.rb +43 -0
  204. data/lib/webhookdb/message.rb +150 -0
  205. data/lib/webhookdb/messages/error_icalendar_fetch.rb +42 -0
  206. data/lib/webhookdb/messages/invite.rb +23 -0
  207. data/lib/webhookdb/messages/new_customer.rb +14 -0
  208. data/lib/webhookdb/messages/org_database_migration_finished.rb +23 -0
  209. data/lib/webhookdb/messages/org_database_migration_started.rb +24 -0
  210. data/lib/webhookdb/messages/specs.rb +57 -0
  211. data/lib/webhookdb/messages/verification.rb +23 -0
  212. data/lib/webhookdb/method_utilities.rb +82 -0
  213. data/lib/webhookdb/microsoft_calendar.rb +36 -0
  214. data/lib/webhookdb/nextpax.rb +14 -0
  215. data/lib/webhookdb/oauth/front.rb +58 -0
  216. data/lib/webhookdb/oauth/intercom.rb +58 -0
  217. data/lib/webhookdb/oauth/session.rb +24 -0
  218. data/lib/webhookdb/oauth.rb +80 -0
  219. data/lib/webhookdb/organization/alerting.rb +35 -0
  220. data/lib/webhookdb/organization/database_migration.rb +151 -0
  221. data/lib/webhookdb/organization/db_builder.rb +429 -0
  222. data/lib/webhookdb/organization.rb +506 -0
  223. data/lib/webhookdb/organization_membership.rb +58 -0
  224. data/lib/webhookdb/phone_number.rb +38 -0
  225. data/lib/webhookdb/plaid.rb +23 -0
  226. data/lib/webhookdb/platform.rb +27 -0
  227. data/lib/webhookdb/plivo.rb +52 -0
  228. data/lib/webhookdb/postgres/maintenance.rb +166 -0
  229. data/lib/webhookdb/postgres/model.rb +82 -0
  230. data/lib/webhookdb/postgres/model_utilities.rb +382 -0
  231. data/lib/webhookdb/postgres/testing_pixie.rb +16 -0
  232. data/lib/webhookdb/postgres/validations.rb +46 -0
  233. data/lib/webhookdb/postgres.rb +176 -0
  234. data/lib/webhookdb/postmark.rb +20 -0
  235. data/lib/webhookdb/redis.rb +35 -0
  236. data/lib/webhookdb/replicator/atom_single_feed_v1.rb +116 -0
  237. data/lib/webhookdb/replicator/aws_pricing_v1.rb +488 -0
  238. data/lib/webhookdb/replicator/base.rb +1185 -0
  239. data/lib/webhookdb/replicator/column.rb +482 -0
  240. data/lib/webhookdb/replicator/convertkit_broadcast_v1.rb +69 -0
  241. data/lib/webhookdb/replicator/convertkit_subscriber_v1.rb +200 -0
  242. data/lib/webhookdb/replicator/convertkit_tag_v1.rb +66 -0
  243. data/lib/webhookdb/replicator/convertkit_v1_mixin.rb +65 -0
  244. data/lib/webhookdb/replicator/docgen.rb +167 -0
  245. data/lib/webhookdb/replicator/email_octopus_campaign_v1.rb +84 -0
  246. data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +159 -0
  247. data/lib/webhookdb/replicator/email_octopus_event_v1.rb +244 -0
  248. data/lib/webhookdb/replicator/email_octopus_list_v1.rb +101 -0
  249. data/lib/webhookdb/replicator/fake.rb +453 -0
  250. data/lib/webhookdb/replicator/front_conversation_v1.rb +45 -0
  251. data/lib/webhookdb/replicator/front_marketplace_root_v1.rb +55 -0
  252. data/lib/webhookdb/replicator/front_message_v1.rb +45 -0
  253. data/lib/webhookdb/replicator/front_v1_mixin.rb +22 -0
  254. data/lib/webhookdb/replicator/github_issue_comment_v1.rb +58 -0
  255. data/lib/webhookdb/replicator/github_issue_v1.rb +83 -0
  256. data/lib/webhookdb/replicator/github_pull_v1.rb +84 -0
  257. data/lib/webhookdb/replicator/github_release_v1.rb +47 -0
  258. data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +250 -0
  259. data/lib/webhookdb/replicator/github_repository_event_v1.rb +45 -0
  260. data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +465 -0
  261. data/lib/webhookdb/replicator/icalendar_event_v1.rb +334 -0
  262. data/lib/webhookdb/replicator/increase_account_number_v1.rb +77 -0
  263. data/lib/webhookdb/replicator/increase_account_transfer_v1.rb +61 -0
  264. data/lib/webhookdb/replicator/increase_account_v1.rb +63 -0
  265. data/lib/webhookdb/replicator/increase_ach_transfer_v1.rb +78 -0
  266. data/lib/webhookdb/replicator/increase_check_transfer_v1.rb +64 -0
  267. data/lib/webhookdb/replicator/increase_limit_v1.rb +78 -0
  268. data/lib/webhookdb/replicator/increase_transaction_v1.rb +74 -0
  269. data/lib/webhookdb/replicator/increase_v1_mixin.rb +121 -0
  270. data/lib/webhookdb/replicator/increase_wire_transfer_v1.rb +61 -0
  271. data/lib/webhookdb/replicator/intercom_contact_v1.rb +36 -0
  272. data/lib/webhookdb/replicator/intercom_conversation_v1.rb +38 -0
  273. data/lib/webhookdb/replicator/intercom_marketplace_root_v1.rb +69 -0
  274. data/lib/webhookdb/replicator/intercom_v1_mixin.rb +105 -0
  275. data/lib/webhookdb/replicator/oauth_refresh_access_token_mixin.rb +65 -0
  276. data/lib/webhookdb/replicator/plivo_sms_inbound_v1.rb +102 -0
  277. data/lib/webhookdb/replicator/postmark_inbound_message_v1.rb +94 -0
  278. data/lib/webhookdb/replicator/postmark_outbound_message_event_v1.rb +107 -0
  279. data/lib/webhookdb/replicator/schema_modification.rb +42 -0
  280. data/lib/webhookdb/replicator/shopify_customer_v1.rb +58 -0
  281. data/lib/webhookdb/replicator/shopify_order_v1.rb +64 -0
  282. data/lib/webhookdb/replicator/shopify_v1_mixin.rb +161 -0
  283. data/lib/webhookdb/replicator/signalwire_message_v1.rb +169 -0
  284. data/lib/webhookdb/replicator/sponsy_customer_v1.rb +54 -0
  285. data/lib/webhookdb/replicator/sponsy_placement_v1.rb +34 -0
  286. data/lib/webhookdb/replicator/sponsy_publication_v1.rb +125 -0
  287. data/lib/webhookdb/replicator/sponsy_slot_v1.rb +41 -0
  288. data/lib/webhookdb/replicator/sponsy_status_v1.rb +35 -0
  289. data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +165 -0
  290. data/lib/webhookdb/replicator/state_machine_step.rb +69 -0
  291. data/lib/webhookdb/replicator/stripe_charge_v1.rb +77 -0
  292. data/lib/webhookdb/replicator/stripe_coupon_v1.rb +62 -0
  293. data/lib/webhookdb/replicator/stripe_customer_v1.rb +60 -0
  294. data/lib/webhookdb/replicator/stripe_dispute_v1.rb +77 -0
  295. data/lib/webhookdb/replicator/stripe_invoice_item_v1.rb +82 -0
  296. data/lib/webhookdb/replicator/stripe_invoice_v1.rb +116 -0
  297. data/lib/webhookdb/replicator/stripe_payout_v1.rb +67 -0
  298. data/lib/webhookdb/replicator/stripe_price_v1.rb +60 -0
  299. data/lib/webhookdb/replicator/stripe_product_v1.rb +60 -0
  300. data/lib/webhookdb/replicator/stripe_refund_v1.rb +101 -0
  301. data/lib/webhookdb/replicator/stripe_subscription_item_v1.rb +56 -0
  302. data/lib/webhookdb/replicator/stripe_subscription_v1.rb +75 -0
  303. data/lib/webhookdb/replicator/stripe_v1_mixin.rb +116 -0
  304. data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +141 -0
  305. data/lib/webhookdb/replicator/transistor_episode_v1.rb +169 -0
  306. data/lib/webhookdb/replicator/transistor_show_v1.rb +68 -0
  307. data/lib/webhookdb/replicator/transistor_v1_mixin.rb +65 -0
  308. data/lib/webhookdb/replicator/twilio_sms_v1.rb +156 -0
  309. data/lib/webhookdb/replicator/webhook_request.rb +5 -0
  310. data/lib/webhookdb/replicator/webhookdb_customer_v1.rb +74 -0
  311. data/lib/webhookdb/replicator.rb +224 -0
  312. data/lib/webhookdb/role.rb +42 -0
  313. data/lib/webhookdb/sentry.rb +35 -0
  314. data/lib/webhookdb/service/auth.rb +138 -0
  315. data/lib/webhookdb/service/collection.rb +91 -0
  316. data/lib/webhookdb/service/entities.rb +97 -0
  317. data/lib/webhookdb/service/helpers.rb +270 -0
  318. data/lib/webhookdb/service/middleware.rb +124 -0
  319. data/lib/webhookdb/service/types.rb +30 -0
  320. data/lib/webhookdb/service/validators.rb +32 -0
  321. data/lib/webhookdb/service/view_api.rb +63 -0
  322. data/lib/webhookdb/service.rb +219 -0
  323. data/lib/webhookdb/service_integration.rb +332 -0
  324. data/lib/webhookdb/shopify.rb +35 -0
  325. data/lib/webhookdb/signalwire.rb +13 -0
  326. data/lib/webhookdb/slack.rb +68 -0
  327. data/lib/webhookdb/snowflake.rb +90 -0
  328. data/lib/webhookdb/spec_helpers/async.rb +122 -0
  329. data/lib/webhookdb/spec_helpers/citest.rb +88 -0
  330. data/lib/webhookdb/spec_helpers/integration.rb +121 -0
  331. data/lib/webhookdb/spec_helpers/message.rb +41 -0
  332. data/lib/webhookdb/spec_helpers/postgres.rb +220 -0
  333. data/lib/webhookdb/spec_helpers/service.rb +432 -0
  334. data/lib/webhookdb/spec_helpers/shared_examples_for_columns.rb +56 -0
  335. data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +915 -0
  336. data/lib/webhookdb/spec_helpers/whdb.rb +139 -0
  337. data/lib/webhookdb/spec_helpers.rb +63 -0
  338. data/lib/webhookdb/sponsy.rb +14 -0
  339. data/lib/webhookdb/stripe.rb +37 -0
  340. data/lib/webhookdb/subscription.rb +203 -0
  341. data/lib/webhookdb/sync_target.rb +491 -0
  342. data/lib/webhookdb/tasks/admin.rb +49 -0
  343. data/lib/webhookdb/tasks/annotate.rb +36 -0
  344. data/lib/webhookdb/tasks/db.rb +82 -0
  345. data/lib/webhookdb/tasks/docs.rb +42 -0
  346. data/lib/webhookdb/tasks/fixture.rb +35 -0
  347. data/lib/webhookdb/tasks/message.rb +50 -0
  348. data/lib/webhookdb/tasks/regress.rb +87 -0
  349. data/lib/webhookdb/tasks/release.rb +27 -0
  350. data/lib/webhookdb/tasks/sidekiq.rb +23 -0
  351. data/lib/webhookdb/tasks/specs.rb +64 -0
  352. data/lib/webhookdb/theranest.rb +15 -0
  353. data/lib/webhookdb/transistor.rb +13 -0
  354. data/lib/webhookdb/twilio.rb +13 -0
  355. data/lib/webhookdb/typed_struct.rb +44 -0
  356. data/lib/webhookdb/version.rb +5 -0
  357. data/lib/webhookdb/webhook_response.rb +50 -0
  358. data/lib/webhookdb/webhook_subscription/delivery.rb +82 -0
  359. data/lib/webhookdb/webhook_subscription.rb +226 -0
  360. data/lib/webhookdb/windows_tz.rb +32 -0
  361. data/lib/webhookdb/xml.rb +92 -0
  362. data/lib/webhookdb.rb +224 -0
  363. data/lib/webterm/apps.rb +45 -0
  364. metadata +1129 -0
@@ -0,0 +1,1185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "appydays/loggable"
4
+ require "concurrent-ruby"
5
+
6
+ require "webhookdb/backfiller"
7
+ require "webhookdb/db_adapter"
8
+ require "webhookdb/connection_cache"
9
+ require "webhookdb/replicator/column"
10
+ require "webhookdb/replicator/schema_modification"
11
+ require "webhookdb/replicator/webhook_request"
12
+ require "webhookdb/typed_struct"
13
+
14
+ require "webhookdb/jobs/send_webhook"
15
+ require "webhookdb/jobs/sync_target_run_sync"
16
+
17
+ class Webhookdb::Replicator::Base
18
+ include Appydays::Loggable
19
+ include Webhookdb::DBAdapter::ColumnTypes
20
+
21
+ # Return the descriptor for this service.
22
+ # @abstract
23
+ # @return [Webhookdb::Replicator::Descriptor]
24
+ def self.descriptor
25
+ raise NotImplementedError, "#{self.class}: must return a descriptor that is used for registration purposes"
26
+ end
27
+
28
+ # @return [Webhookdb::ServiceIntegration]
29
+ attr_reader :service_integration
30
+
31
+ def initialize(service_integration)
32
+ @service_integration = service_integration
33
+ end
34
+
35
+ # @return [Webhookdb::Replicator::Descriptor]
36
+ def descriptor
37
+ return @descriptor ||= self.class.descriptor
38
+ end
39
+
40
+ def resource_name_singular
41
+ return @resource_name_singular ||= self.descriptor.resource_name_singular
42
+ end
43
+
44
+ def resource_name_plural
45
+ return @resource_name_plural ||= self.descriptor.resource_name_plural
46
+ end
47
+
48
+ # Return true if the service should process webhooks in the actual endpoint,
49
+ # rather than asynchronously through the job system.
50
+ # This should ONLY be used where we have important order-of-operations
51
+ # in webhook processing and/or need to return data to the webhook sender.
52
+ #
53
+ # NOTE: You MUST implement +synchronous_processing_response_body+ if this returns true.
54
+ #
55
+ # @return [Boolean]
56
+ def process_webhooks_synchronously?
57
+ return false
58
+ end
59
+
60
+ # Call with the value that was inserted by synchronous processing.
61
+ # Takes the row values being upserted (result upsert_webhook),
62
+ # and the arguments used to upsert it (arguments to upsert_webhook),
63
+ # and should return the body string to respond back with.
64
+ #
65
+ # @param [Hash] upserted
66
+ # @param [Webhookdb::Replicator::WebhookRequest] request
67
+ # @return [String]
68
+ def synchronous_processing_response_body(upserted:, request:)
69
+ return {message: "process synchronously"}.to_json if Webhookdb::Replicator.always_process_synchronously
70
+ raise NotImplementedError, "must be implemented if process_webhooks_synchronously? is true"
71
+ end
72
+
73
+ # In some cases, services may send us sensitive headers we do not want to log.
74
+ # This should be very rare but some services are designed really badly and send auth info in the webhook.
75
+ # Remove or obfuscate the passed header hash.
76
+ def preprocess_headers_for_logging(headers); end
77
+
78
+ # Return a tuple of (schema, table) based on the organization's replication schema,
79
+ # and the service integration's table name.
80
+ #
81
+ # @return [Array<Symbol>]
82
+ def schema_and_table_symbols
83
+ sch = self.service_integration.organization&.replication_schema&.to_sym || :public
84
+ tbl = self.service_integration.table_name.to_sym
85
+ return [sch, tbl]
86
+ end
87
+
88
+ # Return a Sequel identifier using +schema_and_table_symbols+,
89
+ # or +schema+ or +table+ as overrides if given.
90
+ #
91
+ # @return [Sequel::SQL::QualifiedIdentifier]
92
+ def qualified_table_sequel_identifier(schema: nil, table: nil)
93
+ sch, tbl = self.schema_and_table_symbols
94
+ return Sequel[schema || sch][table || tbl]
95
+ end
96
+
97
+ # Return a DBAdapter table based on the +schema_and_table_symbols+.
98
+ # @return [Webhookdb::DBAdapter::Table]
99
+ def dbadapter_table
100
+ sch, tbl = self.schema_and_table_symbols
101
+ schema = Webhookdb::DBAdapter::Schema.new(name: sch)
102
+ table = Webhookdb::DBAdapter::Table.new(name: tbl, schema:)
103
+ return table
104
+ end
105
+
106
+ # +Time.at(t)+, but nil if t is nil.
107
+ # Use when we have 'nullable' integer timestamps.
108
+ # @return [Time]
109
+ protected def tsat(t)
110
+ return nil if t.nil?
111
+ return Time.at(t)
112
+ end
113
+
114
+ # Given a Rack request, return the webhook response object.
115
+ # Usually this performs verification of the request based on the webhook secret
116
+ # configured on the service integration.
117
+ # Note that if +skip_webhook_verification+ is true on the service integration,
118
+ # this method always returns 201.
119
+ #
120
+ # @param [Rack::Request] request
121
+ # @return [Webhookdb::WebhookResponse]
122
+ def webhook_response(request)
123
+ return Webhookdb::WebhookResponse.ok(status: 201) if self.service_integration.skip_webhook_verification
124
+ return self._webhook_response(request)
125
+ end
126
+
127
+ # Return a the response for the webhook.
128
+ # We must do this immediately in the endpoint itself,
129
+ # since verification may include info specific to the request content
130
+ # (like, it can be whitespace sensitive).
131
+ # @abstract
132
+ # @param [Rack::Request] request
133
+ # @return [Webhookdb::WebhookResponse]
134
+ def _webhook_response(request)
135
+ raise NotImplementedError
136
+ end
137
+
138
+ # If we support webhooks, these fields correspond to the webhook state machine.
139
+ # Override them if some other fields are also needed for webhooks.
140
+ def _webhook_state_change_fields = ["webhook_secret"]
141
+
142
+ # If we support backfilling, these keys are used for them.
143
+ # Override if other fields are used instead.
144
+ # There cannot be overlap between these and the webhook state change fields.
145
+ def _backfill_state_change_fields = ["backfill_key", "backfill_secret", "api_url"]
146
+
147
+ # Set the new service integration field and
148
+ # return the newly calculated state machine.
149
+ #
150
+ # Subclasses can override this method and then super,
151
+ # to change the field or value.
152
+ #
153
+ # @param field [String] Like 'webhook_secret', 'backfill_key', etc.
154
+ # @param value [String] The value of the field.
155
+ # @param attr [String] Subclasses can pass in a custom field that does not correspond
156
+ # to a service integration column. When doing that, they must pass in attr,
157
+ # which is what will be set during the state change.
158
+ # @return [Webhookdb::Replicator::StateMachineStep]
159
+ def process_state_change(field, value, attr: nil)
160
+ attr ||= field
161
+ desc = self.descriptor
162
+ case field
163
+ when *self._webhook_state_change_fields
164
+ # If we don't support webhooks, then the backfill state machine may be using it.
165
+ meth = desc.supports_webhooks? ? :calculate_webhook_state_machine : :calculate_backfill_state_machine
166
+ when *self._backfill_state_change_fields
167
+ # If we don't support backfilling, then the create state machine may be using them.
168
+ meth = desc.supports_backfill? ? :calculate_backfill_state_machine : :calculate_webhook_state_machine
169
+ when "dependency_choice"
170
+ # Choose an upstream dependency for an integration.
171
+ # See where this is used for more details.
172
+ meth = self.preferred_create_state_machine_method
173
+ value = self._find_dependency_candidate(value)
174
+ attr = "depends_on"
175
+ when "noop_create"
176
+ # Use this to just recalculate the state machine,
177
+ # not make any changes to the data.
178
+ return self.calculate_preferred_create_state_machine
179
+ else
180
+ raise ArgumentError, "Field '#{field}' is not valid for a state change"
181
+ end
182
+ self.service_integration.db.transaction do
183
+ self.service_integration.send(:"#{attr}=", value)
184
+ self.service_integration.save_changes
185
+ step = self.send(meth)
186
+ if step.successful? && meth == :calculate_backfill_state_machine
187
+ # If we are processing the backfill state machine, and we finish successfully,
188
+ # we always want to start syncing.
189
+ self._enqueue_backfill_jobs(incremental: true)
190
+ end
191
+ return step
192
+ end
193
+ end
194
+
195
+ # If the integration supports webhooks, then we want to do that on create.
196
+ # If it's backfill only, then we fall back to that instead.
197
+ # Things like choosing dependencies are webhook-vs-backfill agnostic,
198
+ # so which machine we choose isn't that important (but it does happen during creation).
199
+ # @return [Symbol]
200
+ def preferred_create_state_machine_method
201
+ return self.descriptor.supports_webhooks? ? :calculate_webhook_state_machine : :calculate_backfill_state_machine
202
+ end
203
+
204
+ # See +preferred_create_state_machine_method+.
205
+ # If we prefer backfilling, and it's successful, we also want to enqueue jobs;
206
+ # that is, use +calculate_and_backfill_state_machine+, not just +calculate_backfill_state_machine+.
207
+ # @return [Webhookdb::Replicator::StateMachineStep]
208
+ def calculate_preferred_create_state_machine
209
+ m = self.preferred_create_state_machine_method
210
+ return self.calculate_and_backfill_state_machine(incremental: true)[0] if m == :calculate_backfill_state_machine
211
+ return self.calculate_webhook_state_machine
212
+ end
213
+
214
+ def _enqueue_backfill_jobs(incremental:, criteria: nil, recursive: true, enqueue: true)
215
+ m = recursive ? :create_recursive : :create
216
+ j = Webhookdb::BackfillJob.send(
217
+ m,
218
+ service_integration:,
219
+ incremental:,
220
+ criteria: criteria || {},
221
+ created_by: Webhookdb.request_user_and_admin[0],
222
+ )
223
+ j.enqueue if enqueue
224
+ return j
225
+ end
226
+
227
+ # @param value [String]
228
+ def _find_dependency_candidate(value)
229
+ int_val = value.strip.blank? ? 1 : value.to_i
230
+ idx = int_val - 1
231
+ dep_candidates = self.service_integration.dependency_candidates
232
+ raise Webhookdb::InvalidPrecondition, "no dependency candidates" if dep_candidates.empty?
233
+ raise Webhookdb::InvalidInput, "'#{value}' is not a valid dependency" if
234
+ idx.negative? || idx >= dep_candidates.length
235
+ return dep_candidates[idx]
236
+ end
237
+
238
+ # Return the state machine that is used when setting up this integration.
239
+ # Usually this entails providing the user the webhook url,
240
+ # and providing or asking for a webhook secret. In some cases,
241
+ # this can be a lot more complex though.
242
+ #
243
+ # @abstract
244
+ # @return [Webhookdb::Replicator::StateMachineStep]
245
+ def calculate_webhook_state_machine
246
+ raise NotImplementedError
247
+ end
248
+
249
+ # Return the state machine that is used when adding backfill support to an integration.
250
+ # Usually this sets one or both of the backfill key and secret.
251
+ #
252
+ # @return [Webhookdb::Replicator::StateMachineStep]
253
+ def calculate_backfill_state_machine
254
+ # This is a pure function that can be tested on its own--the endpoints just need to return a state machine step
255
+ raise NotImplementedError
256
+ end
257
+
258
+ # Run calculate_backfill_state_machine.
259
+ # Then create and enqueue a new BackfillJob if it's successful.
260
+ # Returns a tuple of the StateMachineStep and BackfillJob.
261
+ # If the BackfillJob is returned, the StateMachineStep was successful;
262
+ # otherwise no job is created and the second item is nil.
263
+ # @return [Array<Webhookdb::StateMachineStep, Webhookdb::BackfillJob>]
264
+ def calculate_and_backfill_state_machine(incremental:, criteria: nil, recursive: true, enqueue: true)
265
+ step = self.calculate_backfill_state_machine
266
+ bfjob = nil
267
+ bfjob = self._enqueue_backfill_jobs(incremental:, criteria:, recursive:, enqueue:) if step.successful?
268
+ return step, bfjob
269
+ end
270
+
271
+ # When backfilling is not supported, this message is used.
272
+ # It can be overridden for custom explanations,
273
+ # or descriptor#documentation_url can be provided,
274
+ # which will use a default message.
275
+ # If no documentation is available, a fallback message is used.
276
+ def backfill_not_supported_message
277
+ du = self.documentation_url
278
+ if du.blank?
279
+ msg = %(Sorry, you cannot backfill this integration. You may be looking for one of the following:
280
+
281
+ webhookdb integrations reset #{self.service_integration.table_name}
282
+ )
283
+ return msg
284
+ end
285
+ msg = %(Sorry, you cannot manually backfill this integration.
286
+ Please refer to the documentation at #{du}
287
+ for information on how to refresh data.)
288
+ return msg
289
+ end
290
+
291
+ # Remove all the information used in the initial creation of the integration so that it can be re-entered
292
+ def clear_webhook_information
293
+ self._clear_webook_information
294
+ # If we don't support both webhooks and backfilling, we are safe to clear ALL fields
295
+ # and get back into an initial state.
296
+ self._clear_backfill_information unless self.descriptor.supports_webhooks_and_backfill?
297
+ self.service_integration.save_changes
298
+ end
299
+
300
+ def _clear_webook_information
301
+ self.service_integration.set(webhook_secret: "")
302
+ end
303
+
304
+ # Remove all the information needed for backfilling from the integration so that it can be re-entered
305
+ def clear_backfill_information
306
+ self._clear_backfill_information
307
+ # If we don't support both webhooks and backfilling, we are safe to clear ALL fields
308
+ # and get back into an initial state.
309
+ self._clear_webook_information unless self.descriptor.supports_webhooks_and_backfill?
310
+ self.service_integration.save_changes
311
+ end
312
+
313
+ def _clear_backfill_information
314
+ self.service_integration.set(api_url: "", backfill_key: "", backfill_secret: "")
315
+ end
316
+
317
+ # Find a dependent service integration with the given service name.
318
+ # If none are found, return nil. If multiple are found, raise,
319
+ # as this should only be used for automatically managed integrations.
320
+ # @return [Webhookdb::ServiceIntegration,nil]
321
+ def find_dependent(service_name)
322
+ sints = self.service_integration.dependents.filter { |si| si.service_name == service_name }
323
+ raise Webhookdb::InvalidPrecondition, "there are multiple #{service_name} integrations in dependents" if
324
+ sints.length > 1
325
+ return sints.first
326
+ end
327
+
328
+ # @return [Webhookdb::ServiceIntegration]
329
+ def find_dependent!(service_name)
330
+ sint = self.find_dependent(service_name)
331
+ raise Webhookdb::InvalidPrecondition, "there is no #{service_name} integration in dependents" if sint.nil?
332
+ return sint
333
+ end
334
+
335
+ # Use this to determine whether we should add an enrichment column in
336
+ # the create table modification to store the enrichment body.
337
+ def _store_enrichment_body?
338
+ return false
339
+ end
340
+
341
+ def create_table(if_not_exists: false)
342
+ cmd = self.create_table_modification(if_not_exists:)
343
+ self.admin_dataset(timeout: :fast) do |ds|
344
+ cmd.execute(ds.db)
345
+ end
346
+ end
347
+
348
+ # Return the schema modification used to create the table where it does nto exist.
349
+ # @return [Webhookdb::Replicator::SchemaModification]
350
+ def create_table_modification(if_not_exists: false)
351
+ table = self.dbadapter_table
352
+ columns = [self.primary_key_column, self.remote_key_column]
353
+ columns.concat(self.storable_columns)
354
+ # 'data' column should be last, since it's very large, we want to see other columns in psql/pgcli first
355
+ columns << self.data_column
356
+ adapter = Webhookdb::DBAdapter::PG.new
357
+ result = Webhookdb::Replicator::SchemaModification.new
358
+ result.transaction_statements << adapter.create_table_sql(table, columns, if_not_exists:)
359
+ self.indices(table).each do |dbindex|
360
+ result.transaction_statements << adapter.create_index_sql(dbindex, concurrently: false)
361
+ end
362
+ result.application_database_statements << self.service_integration.ensure_sequence_sql if self.requires_sequence?
363
+ return result
364
+ end
365
+
366
+ # We need to give indices a persistent name, unique across the schema,
367
+ # since multiple indices within a schema cannot share a name.
368
+ #
369
+ # Note that in certain RDBMS (Postgres) index names cannot exceed a certian length;
370
+ # Postgres will silently truncate them. This can result in an index not being created
371
+ # if it shares the same name as another index and we use 'CREATE INDEX IF NOT EXISTS.'
372
+ #
373
+ # To avoid this, if the generated name exceeds a certain size, an md5 hash of the column names is used.
374
+ #
375
+ # @param columns [Array<Webhookdb::DBAdapter::Column, Webhookdb::Replicator::Column>] Must respond to :name.
376
+ # @return [String]
377
+ protected def index_name(columns)
378
+ raise Webhookdb::InvalidPrecondition, "sint needs an opaque id" if self.service_integration.opaque_id.blank?
379
+ colnames = columns.map(&:name).join("_")
380
+ opaque_id = self.service_integration.opaque_id
381
+ # Handle old IDs without the leading 'svi_'.
382
+ opaque_id = "idx#{opaque_id}" if /\d/.match?(opaque_id[0])
383
+ name = "#{opaque_id}_#{colnames}_idx"
384
+ if name.size > MAX_INDEX_NAME_LENGTH
385
+ # We don't have the 32 extra chars for a full md5 hash.
386
+ # We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
387
+ # So just lop off a few characters (normally 2) from the end of the md5.
388
+ # The collision space is so small (some combination of column names would need to have the
389
+ # same md5, which is unfathomable), we're not really worried about it.
390
+ colnames_md5 = Digest::MD5.hexdigest(colnames)
391
+ available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
392
+ name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
393
+ end
394
+ raise Webhookdb::InvariantViolation, "index names cannot exceed 63 chars, got #{name.size} in '#{name}'" if
395
+ name.size > 63
396
+ return name
397
+ end
398
+
399
+ MAX_INDEX_NAME_LENGTH = 63
400
+
401
+ # @return [Webhookdb::DBAdapter::Column]
402
+ def primary_key_column
403
+ return Webhookdb::DBAdapter::Column.new(name: :pk, type: BIGINT, pk: true)
404
+ end
405
+
406
+ # @return [Webhookdb::DBAdapter::Column]
407
+ def remote_key_column
408
+ return self._remote_key_column.to_dbadapter(unique: true, nullable: false)
409
+ end
410
+
411
+ # @return [Webhookdb::DBAdapter::Column]
412
+ def data_column
413
+ return Webhookdb::DBAdapter::Column.new(name: :data, type: OBJECT, nullable: false)
414
+ end
415
+
416
+ # Column used to store enrichments. Return nil if the service does not use enrichments.
417
+ # @return [Webhookdb::DBAdapter::Column,nil]
418
+ def enrichment_column
419
+ return nil unless self._store_enrichment_body?
420
+ return Webhookdb::DBAdapter::Column.new(name: :enrichment, type: OBJECT, nullable: true)
421
+ end
422
+
423
+ # @return [Array<Webhookdb::DBAdapter::Column>]
424
+ def denormalized_columns
425
+ return self._denormalized_columns.map(&:to_dbadapter)
426
+ end
427
+
428
+ # Names of columns for multi-column indices.
429
+ # Each one must be in +denormalized_columns+.
430
+ # @return [Array<Webhook::Replicator::IndexSpec>]
431
+ def _extra_index_specs
432
+ return []
433
+ end
434
+
435
+ # Denormalized columns, plus the enrichment column if supported.
436
+ # Does not include the data or external id columns, though perhaps it should.
437
+ # @return [Array<Webhookdb::DBAdapter::Column>]
438
+ def storable_columns
439
+ cols = self.denormalized_columns
440
+ if (enr = self.enrichment_column)
441
+ cols << enr
442
+ end
443
+ return cols
444
+ end
445
+
446
+ # Column to use as the 'timestamp' for the row.
447
+ # This is usually some created or updated at timestamp.
448
+ # @return [Webhookdb::DBAdapter::Column]
449
+ def timestamp_column
450
+ got = self._denormalized_columns.find { |c| c.name == self._timestamp_column_name }
451
+ raise NotImplementedError, "#{self.descriptor.name} has no timestamp column #{self._timestamp_column_name}" if
452
+ got.nil?
453
+ return got.to_dbadapter
454
+ end
455
+
456
+ # The name of the timestamp column in the schema. This column is used primarily for conditional upserts
457
+ # (ie to know if a row has changed), but also as a general way of auditing changes.
458
+ # @abstract
459
+ # @return [Symbol]
460
+ def _timestamp_column_name
461
+ raise NotImplementedError
462
+ end
463
+
464
+ # Each integration needs a single remote key, like the Shopify order id for shopify orders,
465
+ # or sid for Twilio resources. This column must be unique for the table, like a primary key.
466
+ #
467
+ # @abstract
468
+ # @return [Webhookdb::Replicator::Column]
469
+ def _remote_key_column
470
+ raise NotImplementedError
471
+ end
472
+
473
+ # When an integration needs denormalized columns, specify them here.
474
+ # Indices are created for each column.
475
+ # Modifiers can be used if columns should have a default or whatever.
476
+ # See +Webhookdb::Replicator::Column+ for more details about column fields.
477
+ #
478
+ # @return [Array<Webhookdb::Replicator::Column]
479
+ def _denormalized_columns
480
+ return []
481
+ end
482
+
483
+ # @return [Array<Webhookdb::DBAdapter::Index>]
484
+ def indices(table)
485
+ dba_columns = [self.primary_key_column, self.remote_key_column]
486
+ dba_columns.concat(self.storable_columns)
487
+ dba_cols_by_name = dba_columns.index_by(&:name)
488
+
489
+ result = []
490
+ dba_columns.select(&:index?).each do |c|
491
+ targets = [c]
492
+ idx_name = self.index_name(targets)
493
+ result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: c.index_where)
494
+ end
495
+ self._extra_index_specs.each do |spec|
496
+ targets = spec.columns.map { |n| dba_cols_by_name.fetch(n) }
497
+ idx_name = self.index_name(targets)
498
+ result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: spec.where)
499
+ end
500
+ return result
501
+ end
502
+
503
+ # We support adding columns to existing integrations without having to bump the version;
504
+ # changing types, or removing/renaming columns, is not supported and should bump the version
505
+ # or must be handled out-of-band (like deleting the integration then backfilling).
506
+ # To figure out what columns we need to add, we can check what are currently defined,
507
+ # check what exists, and add denormalized columns and indices for those that are missing.
508
+ def ensure_all_columns
509
+ modification = self.ensure_all_columns_modification
510
+ return if modification.noop?
511
+ self.admin_dataset(timeout: :slow_schema) do |ds|
512
+ modification.execute(ds.db)
513
+ # We need to clear cached columns on the data since we know we're adding more.
514
+ # It's probably not a huge deal but may as well keep it in sync.
515
+ ds.send(:clear_columns_cache)
516
+ end
517
+ self.readonly_dataset { |ds| ds.send(:clear_columns_cache) }
518
+ end
519
+
520
+ # @return [Webhookdb::Replicator::SchemaModification]
521
+ def ensure_all_columns_modification
522
+ existing_cols, existing_indices = nil
523
+ max_pk = 0
524
+ sint = self.service_integration
525
+ self.admin_dataset do |ds|
526
+ return self.create_table_modification unless ds.db.table_exists?(self.qualified_table_sequel_identifier)
527
+ existing_cols = ds.columns.to_set
528
+ existing_indices = ds.db[:pg_indexes].where(
529
+ schemaname: sint.organization.replication_schema,
530
+ tablename: sint.table_name,
531
+ ).select_map(:indexname).to_set
532
+ max_pk = ds.max(:pk) || 0
533
+ end
534
+ adapter = Webhookdb::DBAdapter::PG.new
535
+ table = self.dbadapter_table
536
+ result = Webhookdb::Replicator::SchemaModification.new
537
+
538
+ missing_columns = self._denormalized_columns.delete_if { |c| existing_cols.include?(c.name) }
539
+ # Add missing columns
540
+ missing_columns.each do |whcol|
541
+ # Don't bother bulking the ADDs into a single ALTER TABLE, it won't really matter.
542
+ result.transaction_statements << adapter.add_column_sql(table, whcol.to_dbadapter)
543
+ end
544
+ # Easier to handle this explicitly than use storage_columns, but it a duplicated concept so be careful.
545
+ if (enrich_col = self.enrichment_column) && !existing_cols.include?(enrich_col.name)
546
+ result.transaction_statements << adapter.add_column_sql(table, enrich_col)
547
+ end
548
+
549
+ # Backfill values for new columns.
550
+ if missing_columns.any?
551
+ # We need to backfill values into the new column, but we don't want to lock the entire table
552
+ # as we update each row. So we need to update in chunks of rows.
553
+ # Chunk size should be large for speed (and sending over fewer queries), but small enough
554
+ # to induce a viable delay if another query is updating the same row.
555
+ # Note that the delay will only be for writes to those rows; reads will not block,
556
+ # so something a bit longer should be ok.
557
+ #
558
+ # Note that at the point these UPDATEs are running, we have the new column AND the new code inserting
559
+ # into that new column. We could in theory skip all the PKs that were added after this modification
560
+ # started to run. However considering the number of rows in this window will always be relatively low
561
+ # (though not absolutely low), and the SQL backfill operation should yield the same result
562
+ # as the Ruby operation, this doesn't seem too important.
563
+ result.nontransaction_statements.concat(missing_columns.filter_map(&:backfill_statement))
564
+ update_expr = missing_columns.to_h { |c| [c.name, c.backfill_expr || c.to_sql_expr] }
565
+ self.admin_dataset do |ds|
566
+ chunks = Webhookdb::Replicator::Base.chunked_row_update_bounds(max_pk)
567
+ chunks[...-1].each do |(lower, upper)|
568
+ update_query = ds.where { pk > lower }.where { pk <= upper }.update_sql(update_expr)
569
+ result.nontransaction_statements << update_query
570
+ end
571
+ final_update_query = ds.where { pk > chunks[-1][0] }.update_sql(update_expr)
572
+ result.nontransaction_statements << final_update_query
573
+ end
574
+ end
575
+
576
+ # Add missing indices. This should happen AFTER the UPDATE calls so the UPDATEs don't have to update indices.
577
+ self.indices(table).map do |index|
578
+ next if existing_indices.include?(index.name.to_s)
579
+ result.nontransaction_statements << adapter.create_index_sql(index, concurrently: true)
580
+ end
581
+
582
+ result.application_database_statements << sint.ensure_sequence_sql if self.requires_sequence?
583
+ return result
584
+ end
585
+
586
+ # Return an array of tuples used for splitting UPDATE queries so locks are not held on the entire table
587
+ # when backfilling values when adding new columns. See +ensure_all_columns_modification+.
588
+ #
589
+ # The returned chunks are like: [[0, 100], [100, 200], [200]],
590
+ # and meant to be used like `0 < pk <= 100`, `100 < pk <= 200`, `p, > 200`.
591
+ #
592
+ # Note that final value in the array is a single item, used like `pk > chunks[-1][0]`.
593
+ def self.chunked_row_update_bounds(max_pk, chunk_size: 1_000_000)
594
+ result = []
595
+ chunk_lower_pk = 0
596
+ chunk_upper_pk = chunk_size
597
+ while chunk_upper_pk <= max_pk
598
+ # Get chunks like 0 < pk <= 100, 100 < pk <= 200, etc
599
+ # Each loop we increment one row chunk size, until we find the chunk containing our max PK.
600
+ # Ie if row chunk size is 100, and max_pk is 450, the final chunk here is 400-500.
601
+ result << [chunk_lower_pk, chunk_upper_pk]
602
+ chunk_lower_pk += chunk_size
603
+ chunk_upper_pk += chunk_size
604
+ end
605
+ # Finally, one final chunk for all rows greater than our biggest chunk.
606
+ # For example, with a row chunk size of 100, and max_pk of 450, we got a final chunk of 400-500.
607
+ # But we could have gotten 100 writes (with a new max pk of 550), so this 'pk > 500' catches those.
608
+ result << [chunk_lower_pk]
609
+ end
610
+
611
+ # Some integrations require sequences, like when upserting rows with numerical unique ids
612
+ # (if they were random values like UUIDs we could generate them and not use a sequence).
613
+ # In those cases, the integrations can mark themselves as requiring a sequence.
614
+ #
615
+ # The sequence will be created in the *application database*,
616
+ # but it used primarily when inserting rows into the *organization/replication database*.
617
+ # This is necessary because things like sequences are not possible to migrate
618
+ # when moving replication databases.
619
+ def requires_sequence?
620
+ return false
621
+ end
622
+
623
+ # A given HTTP request may not be handled by the service integration it was sent to,
624
+ # for example where the service integration is part of some 'root' hierarchy.
625
+ # This method is called in the webhook endpoint, and should return the replicator
626
+ # used to handle the webhook request. The request is validated by the returned instance,
627
+ # and it is enqueued for processing.
628
+ #
629
+ # By default, the service called by the webhook is the one we want to use,
630
+ # so return self.
631
+ #
632
+ # @param request [Rack::Request]
633
+ # @return [Webhookdb::Replicator::Base]
634
+ def dispatch_request_to(request)
635
+ return self
636
+ end
637
+
638
+ # Upsert webhook using only a body.
639
+ # This is not valid for the rare integration which does not rely on request info,
640
+ # like when we have to take different action based on a request method.
641
+ #
642
+ # @param body [Hash]
643
+ def upsert_webhook_body(body, **kw)
644
+ return self.upsert_webhook(Webhookdb::Replicator::WebhookRequest.new(body:), **kw)
645
+ end
646
+
647
+ # Upsert a webhook request into the database. Note this is a WebhookRequest,
648
+ # NOT a Rack::Request.
649
+ #
650
+ # @param [Webhookdb::Replicator::WebhookRequest] request
651
+ def upsert_webhook(request, **kw)
652
+ return self._upsert_webhook(request, **kw)
653
+ rescue StandardError => e
654
+ self.logger.error("upsert_webhook_error", request: request.as_json, error: e)
655
+ raise
656
+ end
657
+
658
+ # Hook to be overridden, while still retaining
659
+ # top-level upsert_webhook functionality like error handling.
660
+ #
661
+ # @param request [Webhookdb::Replicator::WebhookRequest]
662
+ # @param upsert [Boolean] If false, just return what would be upserted.
663
+ def _upsert_webhook(request, upsert: true)
664
+ resource, event = self._resource_and_event(request)
665
+ return nil if resource.nil?
666
+ enrichment = self._fetch_enrichment(resource, event, request)
667
+ prepared = self._prepare_for_insert(resource, event, request, enrichment)
668
+ raise Webhookdb::InvalidPostcondition if prepared.key?(:data)
669
+ inserting = {}
670
+ data_col_val = self._resource_to_data(resource, event, request, enrichment)
671
+ inserting[:data] = self._to_json(data_col_val)
672
+ inserting[:enrichment] = self._to_json(enrichment) if self._store_enrichment_body?
673
+ inserting.merge!(prepared)
674
+ return inserting unless upsert
675
+ remote_key_col = self._remote_key_column
676
+ updating = self._upsert_update_expr(inserting, enrichment:)
677
+ update_where = self._update_where_expr
678
+ upserted_rows = self.admin_dataset(timeout: :fast) do |ds|
679
+ ds.insert_conflict(
680
+ target: remote_key_col.name,
681
+ update: updating,
682
+ update_where:,
683
+ ).insert(inserting)
684
+ end
685
+ row_changed = upserted_rows.present?
686
+ self._notify_dependents(inserting, row_changed)
687
+ self._publish_rowupsert(inserting) if row_changed
688
+ return inserting
689
+ end
690
+
691
+ # The NULL ASCII character (\u0000), when present in a string ("\u0000"),
692
+ # and then encoded into JSON ("\\u0000") is invalid in PG JSONB- its strings cannot contain NULLs
693
+ # (note that JSONB does not store the encoded string verbatim, it parses it into PG types, and a PG string
694
+ # cannot contain NULL since C strings are NULL-terminated).
695
+ #
696
+ # So we remove the "\\u0000" character from encoded JSON- for example, in the hash {x: "\u0000"},
697
+ # if we #to_json, we end up with '{"x":"\\u0000"}'. The removal of encoded NULL gives us '{"x":""}'.
698
+ #
699
+ # HOWEVER, if the encoded null is itself escaped, we MUST NOT remove it.
700
+ # For example, in the hash {x: "\u0000".to_json}.to_json (ie, a JSON string which contains another JSON string),
701
+ # we end up with '{"x":"\\\\u0000"}`, That is, a string containing the *escaped* null character.
702
+ # This is valid for PG, because it's not a NULL- it's an escaped "\", followed by "u0000".
703
+ # If we were to remove the string "\\u0000", we'd end up with '{"x":"\\"}'. This creates an invalid document.
704
+ #
705
+ # So we remove only "\\u0000" by not replacing "\\\\u0000"- replace all occurences of
706
+ # "<any one character except backslash>\\u0000" with "<character before backslash>".
707
+ def _to_json(v)
708
+ return v.to_json.gsub(/(\\\\u0000|\\u0000)/, {"\\\\u0000" => "\\\\u0000", "\\u0000" => ""})
709
+ end
710
+
711
+ # @param changed [Boolean]
712
+ def _notify_dependents(inserting, changed)
713
+ self.service_integration.dependents.each do |d|
714
+ d.replicator.on_dependency_webhook_upsert(self, inserting, changed:)
715
+ end
716
+ end
717
+
718
+ def _any_subscriptions_to_notify?
719
+ return !self.service_integration.all_webhook_subscriptions_dataset.to_notify.empty?
720
+ end
721
+
722
+ def _publish_rowupsert(row, check_for_subscriptions: true)
723
+ return unless check_for_subscriptions && self._any_subscriptions_to_notify?
724
+ payload = [
725
+ self.service_integration.id,
726
+ {
727
+ row:,
728
+ external_id_column: self._remote_key_column.name,
729
+ external_id: row[self._remote_key_column.name],
730
+ },
731
+ ]
732
+ # We AVOID pubsub here because we do NOT want to go through the router
733
+ # and audit logger for this.
734
+ event = Amigo::Event.create("webhookdb.serviceintegration.rowupsert", payload.as_json)
735
+ Webhookdb::Jobs::SendWebhook.perform_async(event.as_json)
736
+ end
737
+
738
+ # Return true if the integration requires making an API call to upsert.
739
+ # This puts the sync into a lower-priority queue
740
+ # so it is less likely to block other processing.
741
+ # This is usually true if enrichments are involved.
742
+ # @return [Boolean]
743
+ def upsert_has_deps?
744
+ return false
745
+ end
746
+
747
+ # Given the resource that is going to be inserted and an optional event,
748
+ # make an API call to enrich it with further data if needed.
749
+ # The result of this is passed to _prepare_for_insert.
750
+ #
751
+ # @param [Hash,nil] resource
752
+ # @param [Hash,nil] event
753
+ # @param [Webhookdb::Replicator::WebhookRequest] request
754
+ # @return [*]
755
+ def _fetch_enrichment(resource, event, request)
756
+ return nil
757
+ end
758
+
759
+ # The argument for insert_conflict update_where clause.
760
+ # Used to conditionally update, like updating only if a row is newer than what's stored.
761
+ # We must always have an 'update where' because we never want to overwrite with the same data
762
+ # as exists.
763
+ #
764
+ # @example With a meaningful timestmap
765
+ # self.qualified_table_sequel_identifier[:updated_at] < Sequel[:excluded][:updated_at]
766
+ #
767
+ # If an integration does not have any way to detect if a resource changed,
768
+ # it can compare data columns.
769
+ #
770
+ # @example Without a meaingful timestamp
771
+ # self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
772
+ #
773
+ # @abstract
774
+ # @return [Sequel::SQL::Expression]
775
+ def _update_where_expr
776
+ raise NotImplementedError
777
+ end
778
+
779
+ # Given a webhook/backfill item payload,
780
+ # return the resource hash, and an optional event hash.
781
+ # If 'body' is the resource itself,
782
+ # this method returns [body, nil].
783
+ # If 'body' is an event,
784
+ # this method returns [body.resource-key, body].
785
+ # Columns can check for whether there is an event and/or body
786
+ # when converting.
787
+ #
788
+ # If this returns nil, the upsert is skipped.
789
+ #
790
+ # For example, a Stripe customer backfill upsert would be `{id: 'cus_123'}`
791
+ # when we backfill, but `{type: 'event', data: {id: 'cus_123'}}` when handling an event.
792
+ #
793
+ # @abstract
794
+ # @param [Webhookdb::Replicator::WebhookRequest] request
795
+ # @return [Array<Hash>,nil]
796
+ def _resource_and_event(request)
797
+ raise NotImplementedError
798
+ end
799
+
800
+ # Return the hash that should be inserted into the database,
801
+ # based on the denormalized columns and data given.
802
+ # @param [Hash,nil] resource
803
+ # @param [Hash,nil] event
804
+ # @param [Webhookdb::Replicator::WebhookRequest] request
805
+ # @param [Hash,nil] enrichment
806
+ # @return [Hash]
807
+ def _prepare_for_insert(resource, event, request, enrichment)
808
+ h = [self._remote_key_column].concat(self._denormalized_columns).each_with_object({}) do |col, memo|
809
+ value = col.to_ruby_value(resource:, event:, enrichment:, service_integration:)
810
+ skip = value.nil? && col.skip_nil?
811
+ memo[col.name] = value unless skip
812
+ end
813
+ return h
814
+ end
815
+
816
+ # Given the resource, return the value for the :data column.
817
+ # Only needed in rare situations where fields should be stored
818
+ # on the row, but not in :data.
819
+ # To skip :data column updates, return nil.
820
+ # @param [Hash,nil] resource
821
+ # @param [Hash,nil] event
822
+ # @param [Webhookdb::Replicator::WebhookRequest] request
823
+ # @param [Hash,nil] enrichment
824
+ # @return [Hash]
825
+ def _resource_to_data(resource, event, request, enrichment)
826
+ return resource
827
+ end
828
+
829
+ # Given the hash that is passed to the Sequel insert
830
+ # (so contains all columns, including those from _prepare_for_insert),
831
+ # return the hash used for the insert_conflict(update:) keyword args.
832
+ #
833
+ # Rather than sending over the literal values in the inserting statement
834
+ # (which is pretty verbose, like the large 'data' column),
835
+ # make a smaller statement by using 'EXCLUDED'.
836
+ #
837
+ # This can be overriden when the service requires different values
838
+ # for inserting vs. updating, such as when a column's update value
839
+ # must use the EXCLUDED table in the upsert expression.
840
+ #
841
+ # Most commonly, the use case for this is when you want to provide a row a value,
842
+ # but ONLY on insert, OR on update by ONLY if the column is nil.
843
+ # In that case, pass the result of this base method to
844
+ # +_coalesce_excluded_on_update+ (see also for more details).
845
+ #
846
+ # You can also use this method to merge :data columns together. For example:
847
+ # `super_result[:data] = Sequel.lit("#{self.service_integration.table_name}.data || excluded.data")`
848
+ #
849
+ # By default, this will use the same values for UPDATE as are used for INSERT,
850
+ # like `email = EXCLUDED.email` (the 'EXCLUDED' row being the one that failed to insert).
851
+ def _upsert_update_expr(inserting, enrichment: nil)
852
+ result = inserting.each_with_object({}) { |(c, _), h| h[c] = Sequel[:excluded][c] }
853
+ return result
854
+ end
855
+
856
+ # The string 'null' in a json column still represents 'null' but we'd rather have an actual NULL value,
857
+ # represented by 'nil'. So, return nil if the arg is nil (so we get NULL),
858
+ # otherwise return the argument.
859
+ protected def _nil_or_json(x)
860
+ return x.nil? ? nil : x.to_json
861
+ end
862
+
863
+ # Have a column set itself only on insert or if nil.
864
+ #
865
+ # Given the payload to DO UPDATE, mutate it so that
866
+ # the column names included in 'column_names' use what is already in the table,
867
+ # and fall back to what's being inserted.
868
+ # This new payload should be passed to the `update` kwarg of `insert_conflict`:
869
+ #
870
+ # ds.insert_conflict(update: self._coalesce_excluded_on_update(payload, :created_at)).insert(payload)
871
+ #
872
+ # @param update [Hash]
873
+ # @param column_names [Array<Symbol>]
874
+ def _coalesce_excluded_on_update(update, column_names)
875
+ # Now replace just the specific columns we're overriding.
876
+ column_names.each do |c|
877
+ update[c] = Sequel.function(:coalesce, self.qualified_table_sequel_identifier[c], Sequel[:excluded][c])
878
+ end
879
+ end
880
+
881
+ # Yield to a dataset using the admin connection.
882
+ # @return [Sequel::Dataset]
883
+ def admin_dataset(**kw, &)
884
+ self.with_dataset(self.service_integration.organization.admin_connection_url_raw, **kw, &)
885
+ end
886
+
887
+ # Yield to a dataset using the readonly connection.
888
+ # @return [Sequel::Dataset]
889
+ def readonly_dataset(**kw, &)
890
+ self.with_dataset(self.service_integration.organization.readonly_connection_url_raw, **kw, &)
891
+ end
892
+
893
+ protected def with_dataset(url, **kw, &block)
894
+ raise LocalJumpError if block.nil?
895
+ Webhookdb::ConnectionCache.borrow(url, **kw) do |conn|
896
+ yield(conn[self.qualified_table_sequel_identifier])
897
+ end
898
+ end
899
+
900
+ # Run the given block with a (try) advisory lock taken on a combination of:
901
+ #
902
+ # - The table OID for this replicator
903
+ # - The given key
904
+ #
905
+ # Note this this establishes a new DB connection for the advisory lock;
906
+ # we have had issues with advisory locks on reused connections,
907
+ # and this is safer than having a lock that is never released.
908
+ protected def with_advisory_lock(key, &)
909
+ url = self.service_integration.organization.admin_connection_url_raw
910
+ got = nil
911
+ Webhookdb::Dbutil.borrow_conn(url) do |conn|
912
+ table_oid = conn.select(
913
+ Sequel.function(:to_regclass, self.schema_and_table_symbols.join(".")).cast(:oid).as(:table_id),
914
+ ).first[:table_id]
915
+ self.logger.debug("taking_replicator_advisory_lock", table_oid:, key_id: key)
916
+ Sequel::AdvisoryLock.new(conn, table_oid, key).with_lock? do
917
+ got = yield
918
+ end
919
+ end
920
+ return got
921
+ end
922
+
923
+ # Some replicators support 'instant sync', because they are upserted en-masse
924
+ # rather than row-by-row. That is, usually we run sync targets on a cron,
925
+ # because otherwise we'd need to run the sync target for every row.
926
+ # But if inserting is always done through backfilling,
927
+ # we know we have a useful set of results to sync, so don't need to wait for cron.
928
+ def enqueue_sync_targets
929
+ self.service_integration.sync_targets.each do |stgt|
930
+ Webhookdb::Jobs::SyncTargetRunSync.perform_async(stgt.id)
931
+ end
932
+ end
933
+
934
+ class CredentialVerificationResult < Webhookdb::TypedStruct
935
+ attr_reader :verified, :message
936
+ end
937
+
938
+ # Try to verify backfill credentials, by fetching the first page of items.
939
+ # Only relevant for integrations supporting backfilling.
940
+ #
941
+ # If an error is received, return `_verify_backfill_<http status>_err_msg`
942
+ # as the error message, if defined. So for example, a 401 will call the method
943
+ # +_verify_backfill_401_err_msg+ if defined. If such a method is not defined,
944
+ # call and return +_verify_backfill_err_msg+.
945
+ #
946
+ # @return [Webhookdb::CredentialVerificationResult]
947
+ def verify_backfill_credentials
948
+ backfiller = self._backfillers.first
949
+ if backfiller.nil?
950
+ # If for some reason we do not have a backfiller,
951
+ # we can't verify credentials. This should never happen in practice,
952
+ # because we wouldn't call this method if the integration doesn't support it.
953
+ raise "No backfiller available for #{self.service_integration.inspect}"
954
+ end
955
+ begin
956
+ # begin backfill attempt but do not return backfill result
957
+ backfiller.fetch_backfill_page(nil, last_backfilled: nil)
958
+ rescue Webhookdb::Http::Error => e
959
+ msg = if self.respond_to?(:"_verify_backfill_#{e.status}_err_msg")
960
+ self.send(:"_verify_backfill_#{e.status}_err_msg")
961
+ else
962
+ self._verify_backfill_err_msg
963
+ end
964
+ return CredentialVerificationResult.new(verified: false, message: msg)
965
+ rescue TypeError, NoMethodError => e
966
+ # if we don't incur an HTTP error, but do incur an Error due to differences in the shapes of anticipated
967
+ # response data in the `fetch_backfill_page` function, we can assume that the credentials are okay
968
+ self.logger.info "verify_backfill_credentials_expected_failure", error: e
969
+ return CredentialVerificationResult.new(verified: true, message: "")
970
+ end
971
+ return CredentialVerificationResult.new(verified: true, message: "")
972
+ end
973
+
974
+ def _verify_backfill_err_msg
975
+ raise NotImplementedError, "each integration must provide an error message for unanticipated errors"
976
+ end
977
+
978
+ def documentation_url = nil
979
+
980
+ # In order to backfill, we need to:
981
+ # - Iterate through pages of records from the external service
982
+ # - Upsert each record
983
+ # The caveats/complexities are:
984
+ # - The backfill method should take care of retrying fetches for failed pages.
985
+ # - That means it needs to keep track of some pagination token.
986
+ # @param job [Webhookdb::BackfillJob]
987
+ def backfill(job)
988
+ raise Webhookdb::InvalidPrecondition, "job is for different service integration" unless
989
+ job.service_integration === self.service_integration
990
+
991
+ raise Webhookdb::InvariantViolation, "manual backfill not supported" unless self.descriptor.supports_backfill?
992
+
993
+ sint = self.service_integration
994
+ raise Webhookdb::Replicator::CredentialsMissing if
995
+ sint.backfill_key.blank? && sint.backfill_secret.blank? && sint.depends_on.blank?
996
+ last_backfilled = job.incremental? ? sint.last_backfilled_at : nil
997
+ new_last_backfilled = Time.now
998
+ job.update(started_at: Time.now)
999
+
1000
+ backfillers = self._backfillers(**job.criteria.symbolize_keys)
1001
+ if self._parallel_backfill && self._parallel_backfill > 1
1002
+ # Create a dedicated threadpool for these backfillers,
1003
+ # with max parallelism determined by the replicator.
1004
+ pool = Concurrent::FixedThreadPool.new(self._parallel_backfill)
1005
+ # Record any errors that occur, since they won't raise otherwise.
1006
+ # Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
1007
+ errors = Array.new(backfillers.size)
1008
+ backfillers.each_with_index do |bf, idx|
1009
+ pool.post do
1010
+ bf.backfill(last_backfilled)
1011
+ rescue StandardError => e
1012
+ errors[idx] = e
1013
+ end
1014
+ end
1015
+ # We've enqueued all backfillers; do not accept anymore work.
1016
+ pool.shutdown
1017
+ loop do
1018
+ # We want to stop early if we find an error, so check for errors every 10 seconds.
1019
+ completed = pool.wait_for_termination(10)
1020
+ first_error = errors.find { |e| !e.nil? }
1021
+ if first_error.nil?
1022
+ # No error, and wait_for_termination returned true, so all work is done.
1023
+ break if completed
1024
+ # No error, but work is still going on, so loop again.
1025
+ next
1026
+ end
1027
+ # We have an error; don't run any more backfillers.
1028
+ pool.kill
1029
+ # Wait for all ongoing backfills before raising.
1030
+ pool.wait_for_termination
1031
+ raise first_error
1032
+ end
1033
+ else
1034
+ backfillers.each do |backfiller|
1035
+ backfiller.backfill(last_backfilled)
1036
+ end
1037
+ end
1038
+
1039
+ sint.update(last_backfilled_at: new_last_backfilled) if job.incremental?
1040
+ job.update(finished_at: Time.now)
1041
+ job.enqueue_children
1042
+ end
1043
+
1044
+ # If this replicator supports backfilling in parallel (running multiple backfillers at a time),
1045
+ # return the degree of paralellism (or nil if not running in parallel).
1046
+ # We leave parallelism up to the replicator, not CPU count, since most work
1047
+ # involves waiting on APIs to return.
1048
+ #
1049
+ # NOTE: These threads are in addition to any worker threads, so it's important
1050
+ # to pay attention to memory use.
1051
+ def _parallel_backfill
1052
+ return nil
1053
+ end
1054
+
1055
+ # Return backfillers for the replicator.
1056
+ # We must use an array for 'data-based' backfillers,
1057
+ # like when we need to paginate for each row in another table.
1058
+ #
1059
+ # By default, return a ServiceBackfiller,
1060
+ # which will call _fetch_backfill_page on the receiver.
1061
+ #
1062
+ # @return [Array<Webhookdb::Backfiller>]
1063
+ def _backfillers
1064
+ return [ServiceBackfiller.new(self)]
1065
+ end
1066
+
1067
+ # Basic backfiller that calls +_fetch_backfill_page+ on the given replicator.
1068
+ # Any timeouts or 5xx errors are automatically re-enqueued for a retry.
1069
+ # This behavior can be customized somewhat setting :backfiller_server_error_retries (default to 2)
1070
+ # and :backfiller_server_error_backoff on the replicator (default to 63 seconds),
1071
+ # though customization beyond that should use a custom backfiller.
1072
+ class ServiceBackfiller < Webhookdb::Backfiller
1073
+ # @!attribute svc
1074
+ # @return [Webhookdb::Replicator::Base]
1075
+ attr_reader :svc
1076
+
1077
+ attr_accessor :server_error_retries, :server_error_backoff
1078
+
1079
+ def initialize(svc)
1080
+ @svc = svc
1081
+ @server_error_retries = _getifrespondto(:backfiller_server_error_retries, 2)
1082
+ @server_error_backoff = _getifrespondto(:backfiller_server_error_backoff, 63.seconds)
1083
+ raise "#{svc} must implement :_fetch_backfill_page" unless svc.respond_to?(:_fetch_backfill_page)
1084
+ super()
1085
+ end
1086
+
1087
+ private def _getifrespondto(sym, default)
1088
+ return default unless @svc.respond_to?(sym)
1089
+ return @svc.send(sym)
1090
+ end
1091
+
1092
+ def handle_item(item)
1093
+ return @svc.upsert_webhook_body(item)
1094
+ end
1095
+
1096
+ def fetch_backfill_page(pagination_token, last_backfilled:)
1097
+ return @svc._fetch_backfill_page(pagination_token, last_backfilled:)
1098
+ rescue ::Timeout::Error, ::SocketError
1099
+ self.__retryordie
1100
+ rescue Webhookdb::Http::Error => e
1101
+ self.__retryordie if e.status >= 500
1102
+ raise
1103
+ end
1104
+
1105
+ def __retryordie
1106
+ raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff)
1107
+ end
1108
+ end
1109
+
1110
+ # Called when the upstream dependency upserts. In most cases, you can noop;
1111
+ # but in some cases, you may want to update or fetch rows.
1112
+ # One example would be a 'db only' integration, where values are taken from the parent service
1113
+ # and added to this service's table. We may want to upsert rows in our table
1114
+ # whenever a row in our parent table changes.
1115
+ #
1116
+ # @param replicator [Webhookdb::Replicator::Base]
1117
+ # @param payload [Hash]
1118
+ # @param changed [Boolean]
1119
+ def on_dependency_webhook_upsert(replicator, payload, changed:)
1120
+ raise NotImplementedError, "this must be overridden for replicators that have dependencies"
1121
+ end
1122
+
1123
+ def calculate_dependency_state_machine_step(dependency_help:)
1124
+ raise Webhookdb::InvalidPrecondition, "#{self.descriptor.name} does not have a dependency" if
1125
+ self.class.descriptor.dependency_descriptor.nil?
1126
+ return nil if self.service_integration.depends_on_id
1127
+ step = Webhookdb::Replicator::StateMachineStep.new
1128
+ dep_descr = self.descriptor.dependency_descriptor
1129
+ candidates = self.service_integration.dependency_candidates
1130
+ if candidates.empty?
1131
+ step.output = %(This integration requires #{dep_descr.resource_name_plural} to sync.
1132
+
1133
+ You don't have any #{dep_descr.resource_name_singular} integrations yet. You can run:
1134
+
1135
+ webhookdb integrations create #{dep_descr.name}
1136
+
1137
+ to set one up. Then once that's complete, you can re-run:
1138
+
1139
+ webhookdb integrations create #{self.descriptor.name}
1140
+
1141
+ to keep going.
1142
+ )
1143
+ step.error_code = "no_candidate_dependency"
1144
+ return step.completed
1145
+ end
1146
+ choice_lines = candidates.each_with_index.
1147
+ map { |si, idx| "#{idx + 1} - #{si.table_name}" }.
1148
+ join("\n")
1149
+ step.output = %(This integration requires #{dep_descr.resource_name_plural} to sync.
1150
+ #{dependency_help.blank? ? '' : "\n#{dependency_help}\n"}
1151
+ Enter the number for the #{dep_descr.resource_name_singular} integration you want to use,
1152
+ or leave blank to choose the first option.
1153
+
1154
+ #{choice_lines}
1155
+ )
1156
+ step.prompting("Parent integration number")
1157
+ step.post_to_url = self.service_integration.authed_api_path + "/transition/dependency_choice"
1158
+ return step
1159
+ end
1160
+
1161
+ def webhook_endpoint
1162
+ return self._webhook_endpoint
1163
+ end
1164
+
1165
+ protected def _webhook_endpoint
1166
+ return self.service_integration.unauthed_webhook_endpoint
1167
+ end
1168
+
1169
+ protected def _backfill_command
1170
+ return "webhookdb backfill #{self.service_integration.opaque_id}"
1171
+ end
1172
+
1173
+ protected def _query_help_output(prefix: "You can query the table")
1174
+ sint = self.service_integration
1175
+ return %(#{prefix} through your organization's Postgres connection string:
1176
+
1177
+ psql #{sint.organization.readonly_connection_url}
1178
+ > SELECT * FROM #{sint.table_name}
1179
+
1180
+ You can also run a query through the CLI:
1181
+
1182
+ webhookdb db sql "SELECT * FROM #{sint.table_name}"
1183
+ )
1184
+ end
1185
+ end