webhookdb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (364) hide show
  1. checksums.yaml +7 -0
  2. data/data/messages/layouts/blank.email.liquid +10 -0
  3. data/data/messages/layouts/minimal.email.liquid +28 -0
  4. data/data/messages/layouts/standard.email.liquid +28 -0
  5. data/data/messages/partials/button.liquid +15 -0
  6. data/data/messages/partials/environment_banner.liquid +9 -0
  7. data/data/messages/partials/footer.liquid +22 -0
  8. data/data/messages/partials/greeting.liquid +3 -0
  9. data/data/messages/partials/logo_header.liquid +18 -0
  10. data/data/messages/partials/signoff.liquid +1 -0
  11. data/data/messages/styles/v1.liquid +346 -0
  12. data/data/messages/templates/errors/icalendar_fetch.email.liquid +29 -0
  13. data/data/messages/templates/invite.email.liquid +15 -0
  14. data/data/messages/templates/new_customer.email.liquid +24 -0
  15. data/data/messages/templates/org_database_migration_finished.email.liquid +7 -0
  16. data/data/messages/templates/org_database_migration_started.email.liquid +9 -0
  17. data/data/messages/templates/specs/_field_partial.liquid +1 -0
  18. data/data/messages/templates/specs/basic.email.liquid +2 -0
  19. data/data/messages/templates/specs/basic.fake.liquid +1 -0
  20. data/data/messages/templates/specs/with_field.email.liquid +2 -0
  21. data/data/messages/templates/specs/with_field.fake.liquid +1 -0
  22. data/data/messages/templates/specs/with_include.email.liquid +2 -0
  23. data/data/messages/templates/specs/with_partial.email.liquid +1 -0
  24. data/data/messages/templates/verification.email.liquid +14 -0
  25. data/data/messages/templates/verification.sms.liquid +1 -0
  26. data/data/messages/web/install-customer-login.liquid +48 -0
  27. data/data/messages/web/install-error.liquid +17 -0
  28. data/data/messages/web/install-success.liquid +35 -0
  29. data/data/messages/web/install.liquid +20 -0
  30. data/data/messages/web/partials/footer.liquid +4 -0
  31. data/data/messages/web/partials/form_error.liquid +1 -0
  32. data/data/messages/web/partials/header.liquid +3 -0
  33. data/data/messages/web/styles.liquid +134 -0
  34. data/data/windows_tz.txt +461 -0
  35. data/db/migrations/001_testing_pixies.rb +13 -0
  36. data/db/migrations/002_initial.rb +132 -0
  37. data/db/migrations/003_ux_overhaul.rb +20 -0
  38. data/db/migrations/004_incremental_backfill.rb +9 -0
  39. data/db/migrations/005_log_webhooks.rb +24 -0
  40. data/db/migrations/006_generalize_roles.rb +29 -0
  41. data/db/migrations/007_org_dns.rb +12 -0
  42. data/db/migrations/008_webhook_subscriptions.rb +19 -0
  43. data/db/migrations/009_nonunique_stripe_subscription_customer.rb +16 -0
  44. data/db/migrations/010_drop_integration_soft_delete.rb +14 -0
  45. data/db/migrations/011_webhook_subscriptions_created_at.rb +10 -0
  46. data/db/migrations/012_webhook_subscriptions_created_by.rb +9 -0
  47. data/db/migrations/013_default_org_membership.rb +30 -0
  48. data/db/migrations/014_webhook_subscription_deliveries.rb +26 -0
  49. data/db/migrations/015_dependent_integrations.rb +9 -0
  50. data/db/migrations/016_encrypted_columns.rb +9 -0
  51. data/db/migrations/017_skip_verification.rb +9 -0
  52. data/db/migrations/018_sync_targets.rb +25 -0
  53. data/db/migrations/019_org_schema.rb +9 -0
  54. data/db/migrations/020_org_database_migrations.rb +25 -0
  55. data/db/migrations/021_no_default_org_schema.rb +14 -0
  56. data/db/migrations/022_database_document.rb +15 -0
  57. data/db/migrations/023_sync_target_schema.rb +9 -0
  58. data/db/migrations/024_org_semaphore_jobs.rb +9 -0
  59. data/db/migrations/025_integration_backfill_cursor.rb +9 -0
  60. data/db/migrations/026_undo_integration_backfill_cursor.rb +9 -0
  61. data/db/migrations/027_sync_target_http_sync.rb +12 -0
  62. data/db/migrations/028_logged_webhook_path.rb +24 -0
  63. data/db/migrations/029_encrypt_columns.rb +97 -0
  64. data/db/migrations/030_org_sync_target_timeout.rb +9 -0
  65. data/db/migrations/031_org_max_query_rows.rb +9 -0
  66. data/db/migrations/032_remove_db_defaults.rb +12 -0
  67. data/db/migrations/033_backfill_jobs.rb +26 -0
  68. data/db/migrations/034_backfill_job_criteria.rb +9 -0
  69. data/db/migrations/035_synchronous_backfill.rb +9 -0
  70. data/db/migrations/036_oauth.rb +26 -0
  71. data/db/migrations/037_oauth_used.rb +9 -0
  72. data/lib/amigo/durable_job.rb +416 -0
  73. data/lib/pry/clipboard.rb +111 -0
  74. data/lib/sequel/advisory_lock.rb +65 -0
  75. data/lib/webhookdb/admin.rb +4 -0
  76. data/lib/webhookdb/admin_api/auth.rb +36 -0
  77. data/lib/webhookdb/admin_api/customers.rb +63 -0
  78. data/lib/webhookdb/admin_api/database_documents.rb +20 -0
  79. data/lib/webhookdb/admin_api/entities.rb +66 -0
  80. data/lib/webhookdb/admin_api/message_deliveries.rb +61 -0
  81. data/lib/webhookdb/admin_api/roles.rb +15 -0
  82. data/lib/webhookdb/admin_api.rb +34 -0
  83. data/lib/webhookdb/aggregate_result.rb +63 -0
  84. data/lib/webhookdb/api/auth.rb +122 -0
  85. data/lib/webhookdb/api/connstr_auth.rb +36 -0
  86. data/lib/webhookdb/api/db.rb +188 -0
  87. data/lib/webhookdb/api/demo.rb +14 -0
  88. data/lib/webhookdb/api/entities.rb +198 -0
  89. data/lib/webhookdb/api/helpers.rb +253 -0
  90. data/lib/webhookdb/api/install.rb +296 -0
  91. data/lib/webhookdb/api/me.rb +53 -0
  92. data/lib/webhookdb/api/organizations.rb +254 -0
  93. data/lib/webhookdb/api/replay.rb +64 -0
  94. data/lib/webhookdb/api/service_integrations.rb +402 -0
  95. data/lib/webhookdb/api/services.rb +27 -0
  96. data/lib/webhookdb/api/stripe.rb +22 -0
  97. data/lib/webhookdb/api/subscriptions.rb +67 -0
  98. data/lib/webhookdb/api/sync_targets.rb +232 -0
  99. data/lib/webhookdb/api/system.rb +37 -0
  100. data/lib/webhookdb/api/webhook_subscriptions.rb +96 -0
  101. data/lib/webhookdb/api.rb +92 -0
  102. data/lib/webhookdb/apps.rb +93 -0
  103. data/lib/webhookdb/async/audit_logger.rb +38 -0
  104. data/lib/webhookdb/async/autoscaler.rb +84 -0
  105. data/lib/webhookdb/async/job.rb +18 -0
  106. data/lib/webhookdb/async/job_logger.rb +45 -0
  107. data/lib/webhookdb/async/scheduled_job.rb +18 -0
  108. data/lib/webhookdb/async.rb +142 -0
  109. data/lib/webhookdb/aws.rb +98 -0
  110. data/lib/webhookdb/backfill_job.rb +107 -0
  111. data/lib/webhookdb/backfiller.rb +107 -0
  112. data/lib/webhookdb/cloudflare.rb +39 -0
  113. data/lib/webhookdb/connection_cache.rb +177 -0
  114. data/lib/webhookdb/console.rb +71 -0
  115. data/lib/webhookdb/convertkit.rb +14 -0
  116. data/lib/webhookdb/crypto.rb +66 -0
  117. data/lib/webhookdb/customer/reset_code.rb +94 -0
  118. data/lib/webhookdb/customer.rb +347 -0
  119. data/lib/webhookdb/database_document.rb +72 -0
  120. data/lib/webhookdb/db_adapter/column_types.rb +37 -0
  121. data/lib/webhookdb/db_adapter/default_sql.rb +187 -0
  122. data/lib/webhookdb/db_adapter/pg.rb +96 -0
  123. data/lib/webhookdb/db_adapter/snowflake.rb +137 -0
  124. data/lib/webhookdb/db_adapter.rb +208 -0
  125. data/lib/webhookdb/dbutil.rb +92 -0
  126. data/lib/webhookdb/demo_mode.rb +100 -0
  127. data/lib/webhookdb/developer_alert.rb +51 -0
  128. data/lib/webhookdb/email_octopus.rb +21 -0
  129. data/lib/webhookdb/enumerable.rb +18 -0
  130. data/lib/webhookdb/fixtures/backfill_jobs.rb +72 -0
  131. data/lib/webhookdb/fixtures/customers.rb +65 -0
  132. data/lib/webhookdb/fixtures/database_documents.rb +27 -0
  133. data/lib/webhookdb/fixtures/faker.rb +41 -0
  134. data/lib/webhookdb/fixtures/logged_webhooks.rb +56 -0
  135. data/lib/webhookdb/fixtures/message_deliveries.rb +59 -0
  136. data/lib/webhookdb/fixtures/oauth_sessions.rb +24 -0
  137. data/lib/webhookdb/fixtures/organization_database_migrations.rb +37 -0
  138. data/lib/webhookdb/fixtures/organization_memberships.rb +54 -0
  139. data/lib/webhookdb/fixtures/organizations.rb +32 -0
  140. data/lib/webhookdb/fixtures/reset_codes.rb +23 -0
  141. data/lib/webhookdb/fixtures/service_integrations.rb +42 -0
  142. data/lib/webhookdb/fixtures/subscriptions.rb +33 -0
  143. data/lib/webhookdb/fixtures/sync_targets.rb +32 -0
  144. data/lib/webhookdb/fixtures/webhook_subscriptions.rb +35 -0
  145. data/lib/webhookdb/fixtures.rb +15 -0
  146. data/lib/webhookdb/formatting.rb +56 -0
  147. data/lib/webhookdb/front.rb +49 -0
  148. data/lib/webhookdb/github.rb +22 -0
  149. data/lib/webhookdb/google_calendar.rb +29 -0
  150. data/lib/webhookdb/heroku.rb +21 -0
  151. data/lib/webhookdb/http.rb +114 -0
  152. data/lib/webhookdb/icalendar.rb +17 -0
  153. data/lib/webhookdb/id.rb +17 -0
  154. data/lib/webhookdb/idempotency.rb +90 -0
  155. data/lib/webhookdb/increase.rb +42 -0
  156. data/lib/webhookdb/intercom.rb +23 -0
  157. data/lib/webhookdb/jobs/amigo_test_jobs.rb +118 -0
  158. data/lib/webhookdb/jobs/backfill.rb +32 -0
  159. data/lib/webhookdb/jobs/create_mirror_table.rb +18 -0
  160. data/lib/webhookdb/jobs/create_stripe_customer.rb +17 -0
  161. data/lib/webhookdb/jobs/customer_created_notify_internal.rb +22 -0
  162. data/lib/webhookdb/jobs/demo_mode_sync_data.rb +19 -0
  163. data/lib/webhookdb/jobs/deprecated_jobs.rb +19 -0
  164. data/lib/webhookdb/jobs/developer_alert_handle.rb +14 -0
  165. data/lib/webhookdb/jobs/durable_job_recheck_poller.rb +17 -0
  166. data/lib/webhookdb/jobs/emailer.rb +15 -0
  167. data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +25 -0
  168. data/lib/webhookdb/jobs/icalendar_sync.rb +23 -0
  169. data/lib/webhookdb/jobs/logged_webhook_replay.rb +17 -0
  170. data/lib/webhookdb/jobs/logged_webhook_resilient_replay.rb +15 -0
  171. data/lib/webhookdb/jobs/message_dispatched.rb +16 -0
  172. data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +21 -0
  173. data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +21 -0
  174. data/lib/webhookdb/jobs/organization_database_migration_run.rb +24 -0
  175. data/lib/webhookdb/jobs/prepare_database_connections.rb +22 -0
  176. data/lib/webhookdb/jobs/process_webhook.rb +47 -0
  177. data/lib/webhookdb/jobs/renew_watch_channel.rb +24 -0
  178. data/lib/webhookdb/jobs/replication_migration.rb +24 -0
  179. data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +23 -0
  180. data/lib/webhookdb/jobs/scheduled_backfills.rb +77 -0
  181. data/lib/webhookdb/jobs/send_invite.rb +15 -0
  182. data/lib/webhookdb/jobs/send_test_webhook.rb +25 -0
  183. data/lib/webhookdb/jobs/send_webhook.rb +20 -0
  184. data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +16 -0
  185. data/lib/webhookdb/jobs/sync_target_run_sync.rb +38 -0
  186. data/lib/webhookdb/jobs/trim_logged_webhooks.rb +15 -0
  187. data/lib/webhookdb/jobs/webhook_resource_notify_integrations.rb +30 -0
  188. data/lib/webhookdb/jobs/webhook_subscription_delivery_attempt.rb +29 -0
  189. data/lib/webhookdb/jobs.rb +4 -0
  190. data/lib/webhookdb/json.rb +113 -0
  191. data/lib/webhookdb/liquid/expose.rb +27 -0
  192. data/lib/webhookdb/liquid/filters.rb +16 -0
  193. data/lib/webhookdb/liquid/liquification.rb +26 -0
  194. data/lib/webhookdb/liquid/partial.rb +12 -0
  195. data/lib/webhookdb/logged_webhook/resilient.rb +95 -0
  196. data/lib/webhookdb/logged_webhook.rb +194 -0
  197. data/lib/webhookdb/message/body.rb +25 -0
  198. data/lib/webhookdb/message/delivery.rb +127 -0
  199. data/lib/webhookdb/message/email_transport.rb +133 -0
  200. data/lib/webhookdb/message/fake_transport.rb +54 -0
  201. data/lib/webhookdb/message/liquid_drops.rb +29 -0
  202. data/lib/webhookdb/message/template.rb +89 -0
  203. data/lib/webhookdb/message/transport.rb +43 -0
  204. data/lib/webhookdb/message.rb +150 -0
  205. data/lib/webhookdb/messages/error_icalendar_fetch.rb +42 -0
  206. data/lib/webhookdb/messages/invite.rb +23 -0
  207. data/lib/webhookdb/messages/new_customer.rb +14 -0
  208. data/lib/webhookdb/messages/org_database_migration_finished.rb +23 -0
  209. data/lib/webhookdb/messages/org_database_migration_started.rb +24 -0
  210. data/lib/webhookdb/messages/specs.rb +57 -0
  211. data/lib/webhookdb/messages/verification.rb +23 -0
  212. data/lib/webhookdb/method_utilities.rb +82 -0
  213. data/lib/webhookdb/microsoft_calendar.rb +36 -0
  214. data/lib/webhookdb/nextpax.rb +14 -0
  215. data/lib/webhookdb/oauth/front.rb +58 -0
  216. data/lib/webhookdb/oauth/intercom.rb +58 -0
  217. data/lib/webhookdb/oauth/session.rb +24 -0
  218. data/lib/webhookdb/oauth.rb +80 -0
  219. data/lib/webhookdb/organization/alerting.rb +35 -0
  220. data/lib/webhookdb/organization/database_migration.rb +151 -0
  221. data/lib/webhookdb/organization/db_builder.rb +429 -0
  222. data/lib/webhookdb/organization.rb +506 -0
  223. data/lib/webhookdb/organization_membership.rb +58 -0
  224. data/lib/webhookdb/phone_number.rb +38 -0
  225. data/lib/webhookdb/plaid.rb +23 -0
  226. data/lib/webhookdb/platform.rb +27 -0
  227. data/lib/webhookdb/plivo.rb +52 -0
  228. data/lib/webhookdb/postgres/maintenance.rb +166 -0
  229. data/lib/webhookdb/postgres/model.rb +82 -0
  230. data/lib/webhookdb/postgres/model_utilities.rb +382 -0
  231. data/lib/webhookdb/postgres/testing_pixie.rb +16 -0
  232. data/lib/webhookdb/postgres/validations.rb +46 -0
  233. data/lib/webhookdb/postgres.rb +176 -0
  234. data/lib/webhookdb/postmark.rb +20 -0
  235. data/lib/webhookdb/redis.rb +35 -0
  236. data/lib/webhookdb/replicator/atom_single_feed_v1.rb +116 -0
  237. data/lib/webhookdb/replicator/aws_pricing_v1.rb +488 -0
  238. data/lib/webhookdb/replicator/base.rb +1185 -0
  239. data/lib/webhookdb/replicator/column.rb +482 -0
  240. data/lib/webhookdb/replicator/convertkit_broadcast_v1.rb +69 -0
  241. data/lib/webhookdb/replicator/convertkit_subscriber_v1.rb +200 -0
  242. data/lib/webhookdb/replicator/convertkit_tag_v1.rb +66 -0
  243. data/lib/webhookdb/replicator/convertkit_v1_mixin.rb +65 -0
  244. data/lib/webhookdb/replicator/docgen.rb +167 -0
  245. data/lib/webhookdb/replicator/email_octopus_campaign_v1.rb +84 -0
  246. data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +159 -0
  247. data/lib/webhookdb/replicator/email_octopus_event_v1.rb +244 -0
  248. data/lib/webhookdb/replicator/email_octopus_list_v1.rb +101 -0
  249. data/lib/webhookdb/replicator/fake.rb +453 -0
  250. data/lib/webhookdb/replicator/front_conversation_v1.rb +45 -0
  251. data/lib/webhookdb/replicator/front_marketplace_root_v1.rb +55 -0
  252. data/lib/webhookdb/replicator/front_message_v1.rb +45 -0
  253. data/lib/webhookdb/replicator/front_v1_mixin.rb +22 -0
  254. data/lib/webhookdb/replicator/github_issue_comment_v1.rb +58 -0
  255. data/lib/webhookdb/replicator/github_issue_v1.rb +83 -0
  256. data/lib/webhookdb/replicator/github_pull_v1.rb +84 -0
  257. data/lib/webhookdb/replicator/github_release_v1.rb +47 -0
  258. data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +250 -0
  259. data/lib/webhookdb/replicator/github_repository_event_v1.rb +45 -0
  260. data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +465 -0
  261. data/lib/webhookdb/replicator/icalendar_event_v1.rb +334 -0
  262. data/lib/webhookdb/replicator/increase_account_number_v1.rb +77 -0
  263. data/lib/webhookdb/replicator/increase_account_transfer_v1.rb +61 -0
  264. data/lib/webhookdb/replicator/increase_account_v1.rb +63 -0
  265. data/lib/webhookdb/replicator/increase_ach_transfer_v1.rb +78 -0
  266. data/lib/webhookdb/replicator/increase_check_transfer_v1.rb +64 -0
  267. data/lib/webhookdb/replicator/increase_limit_v1.rb +78 -0
  268. data/lib/webhookdb/replicator/increase_transaction_v1.rb +74 -0
  269. data/lib/webhookdb/replicator/increase_v1_mixin.rb +121 -0
  270. data/lib/webhookdb/replicator/increase_wire_transfer_v1.rb +61 -0
  271. data/lib/webhookdb/replicator/intercom_contact_v1.rb +36 -0
  272. data/lib/webhookdb/replicator/intercom_conversation_v1.rb +38 -0
  273. data/lib/webhookdb/replicator/intercom_marketplace_root_v1.rb +69 -0
  274. data/lib/webhookdb/replicator/intercom_v1_mixin.rb +105 -0
  275. data/lib/webhookdb/replicator/oauth_refresh_access_token_mixin.rb +65 -0
  276. data/lib/webhookdb/replicator/plivo_sms_inbound_v1.rb +102 -0
  277. data/lib/webhookdb/replicator/postmark_inbound_message_v1.rb +94 -0
  278. data/lib/webhookdb/replicator/postmark_outbound_message_event_v1.rb +107 -0
  279. data/lib/webhookdb/replicator/schema_modification.rb +42 -0
  280. data/lib/webhookdb/replicator/shopify_customer_v1.rb +58 -0
  281. data/lib/webhookdb/replicator/shopify_order_v1.rb +64 -0
  282. data/lib/webhookdb/replicator/shopify_v1_mixin.rb +161 -0
  283. data/lib/webhookdb/replicator/signalwire_message_v1.rb +169 -0
  284. data/lib/webhookdb/replicator/sponsy_customer_v1.rb +54 -0
  285. data/lib/webhookdb/replicator/sponsy_placement_v1.rb +34 -0
  286. data/lib/webhookdb/replicator/sponsy_publication_v1.rb +125 -0
  287. data/lib/webhookdb/replicator/sponsy_slot_v1.rb +41 -0
  288. data/lib/webhookdb/replicator/sponsy_status_v1.rb +35 -0
  289. data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +165 -0
  290. data/lib/webhookdb/replicator/state_machine_step.rb +69 -0
  291. data/lib/webhookdb/replicator/stripe_charge_v1.rb +77 -0
  292. data/lib/webhookdb/replicator/stripe_coupon_v1.rb +62 -0
  293. data/lib/webhookdb/replicator/stripe_customer_v1.rb +60 -0
  294. data/lib/webhookdb/replicator/stripe_dispute_v1.rb +77 -0
  295. data/lib/webhookdb/replicator/stripe_invoice_item_v1.rb +82 -0
  296. data/lib/webhookdb/replicator/stripe_invoice_v1.rb +116 -0
  297. data/lib/webhookdb/replicator/stripe_payout_v1.rb +67 -0
  298. data/lib/webhookdb/replicator/stripe_price_v1.rb +60 -0
  299. data/lib/webhookdb/replicator/stripe_product_v1.rb +60 -0
  300. data/lib/webhookdb/replicator/stripe_refund_v1.rb +101 -0
  301. data/lib/webhookdb/replicator/stripe_subscription_item_v1.rb +56 -0
  302. data/lib/webhookdb/replicator/stripe_subscription_v1.rb +75 -0
  303. data/lib/webhookdb/replicator/stripe_v1_mixin.rb +116 -0
  304. data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +141 -0
  305. data/lib/webhookdb/replicator/transistor_episode_v1.rb +169 -0
  306. data/lib/webhookdb/replicator/transistor_show_v1.rb +68 -0
  307. data/lib/webhookdb/replicator/transistor_v1_mixin.rb +65 -0
  308. data/lib/webhookdb/replicator/twilio_sms_v1.rb +156 -0
  309. data/lib/webhookdb/replicator/webhook_request.rb +5 -0
  310. data/lib/webhookdb/replicator/webhookdb_customer_v1.rb +74 -0
  311. data/lib/webhookdb/replicator.rb +224 -0
  312. data/lib/webhookdb/role.rb +42 -0
  313. data/lib/webhookdb/sentry.rb +35 -0
  314. data/lib/webhookdb/service/auth.rb +138 -0
  315. data/lib/webhookdb/service/collection.rb +91 -0
  316. data/lib/webhookdb/service/entities.rb +97 -0
  317. data/lib/webhookdb/service/helpers.rb +270 -0
  318. data/lib/webhookdb/service/middleware.rb +124 -0
  319. data/lib/webhookdb/service/types.rb +30 -0
  320. data/lib/webhookdb/service/validators.rb +32 -0
  321. data/lib/webhookdb/service/view_api.rb +63 -0
  322. data/lib/webhookdb/service.rb +219 -0
  323. data/lib/webhookdb/service_integration.rb +332 -0
  324. data/lib/webhookdb/shopify.rb +35 -0
  325. data/lib/webhookdb/signalwire.rb +13 -0
  326. data/lib/webhookdb/slack.rb +68 -0
  327. data/lib/webhookdb/snowflake.rb +90 -0
  328. data/lib/webhookdb/spec_helpers/async.rb +122 -0
  329. data/lib/webhookdb/spec_helpers/citest.rb +88 -0
  330. data/lib/webhookdb/spec_helpers/integration.rb +121 -0
  331. data/lib/webhookdb/spec_helpers/message.rb +41 -0
  332. data/lib/webhookdb/spec_helpers/postgres.rb +220 -0
  333. data/lib/webhookdb/spec_helpers/service.rb +432 -0
  334. data/lib/webhookdb/spec_helpers/shared_examples_for_columns.rb +56 -0
  335. data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +915 -0
  336. data/lib/webhookdb/spec_helpers/whdb.rb +139 -0
  337. data/lib/webhookdb/spec_helpers.rb +63 -0
  338. data/lib/webhookdb/sponsy.rb +14 -0
  339. data/lib/webhookdb/stripe.rb +37 -0
  340. data/lib/webhookdb/subscription.rb +203 -0
  341. data/lib/webhookdb/sync_target.rb +491 -0
  342. data/lib/webhookdb/tasks/admin.rb +49 -0
  343. data/lib/webhookdb/tasks/annotate.rb +36 -0
  344. data/lib/webhookdb/tasks/db.rb +82 -0
  345. data/lib/webhookdb/tasks/docs.rb +42 -0
  346. data/lib/webhookdb/tasks/fixture.rb +35 -0
  347. data/lib/webhookdb/tasks/message.rb +50 -0
  348. data/lib/webhookdb/tasks/regress.rb +87 -0
  349. data/lib/webhookdb/tasks/release.rb +27 -0
  350. data/lib/webhookdb/tasks/sidekiq.rb +23 -0
  351. data/lib/webhookdb/tasks/specs.rb +64 -0
  352. data/lib/webhookdb/theranest.rb +15 -0
  353. data/lib/webhookdb/transistor.rb +13 -0
  354. data/lib/webhookdb/twilio.rb +13 -0
  355. data/lib/webhookdb/typed_struct.rb +44 -0
  356. data/lib/webhookdb/version.rb +5 -0
  357. data/lib/webhookdb/webhook_response.rb +50 -0
  358. data/lib/webhookdb/webhook_subscription/delivery.rb +82 -0
  359. data/lib/webhookdb/webhook_subscription.rb +226 -0
  360. data/lib/webhookdb/windows_tz.rb +32 -0
  361. data/lib/webhookdb/xml.rb +92 -0
  362. data/lib/webhookdb.rb +224 -0
  363. data/lib/webterm/apps.rb +45 -0
  364. metadata +1129 -0
@@ -0,0 +1,1185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "appydays/loggable"
4
+ require "concurrent-ruby"
5
+
6
+ require "webhookdb/backfiller"
7
+ require "webhookdb/db_adapter"
8
+ require "webhookdb/connection_cache"
9
+ require "webhookdb/replicator/column"
10
+ require "webhookdb/replicator/schema_modification"
11
+ require "webhookdb/replicator/webhook_request"
12
+ require "webhookdb/typed_struct"
13
+
14
+ require "webhookdb/jobs/send_webhook"
15
+ require "webhookdb/jobs/sync_target_run_sync"
16
+
17
+ class Webhookdb::Replicator::Base
18
+ include Appydays::Loggable
19
+ include Webhookdb::DBAdapter::ColumnTypes
20
+
21
+ # Return the descriptor for this service.
22
+ # @abstract
23
+ # @return [Webhookdb::Replicator::Descriptor]
24
+ def self.descriptor
25
+ raise NotImplementedError, "#{self.class}: must return a descriptor that is used for registration purposes"
26
+ end
27
+
28
+ # @return [Webhookdb::ServiceIntegration]
29
+ attr_reader :service_integration
30
+
31
+ def initialize(service_integration)
32
+ @service_integration = service_integration
33
+ end
34
+
35
+ # @return [Webhookdb::Replicator::Descriptor]
36
+ def descriptor
37
+ return @descriptor ||= self.class.descriptor
38
+ end
39
+
40
+ def resource_name_singular
41
+ return @resource_name_singular ||= self.descriptor.resource_name_singular
42
+ end
43
+
44
+ def resource_name_plural
45
+ return @resource_name_plural ||= self.descriptor.resource_name_plural
46
+ end
47
+
48
+ # Return true if the service should process webhooks in the actual endpoint,
49
+ # rather than asynchronously through the job system.
50
+ # This should ONLY be used where we have important order-of-operations
51
+ # in webhook processing and/or need to return data to the webhook sender.
52
+ #
53
+ # NOTE: You MUST implement +synchronous_processing_response_body+ if this returns true.
54
+ #
55
+ # @return [Boolean]
56
+ def process_webhooks_synchronously?
57
+ return false
58
+ end
59
+
60
+ # Call with the value that was inserted by synchronous processing.
61
+ # Takes the row values being upserted (result upsert_webhook),
62
+ # and the arguments used to upsert it (arguments to upsert_webhook),
63
+ # and should return the body string to respond back with.
64
+ #
65
+ # @param [Hash] upserted
66
+ # @param [Webhookdb::Replicator::WebhookRequest] request
67
+ # @return [String]
68
+ def synchronous_processing_response_body(upserted:, request:)
69
+ return {message: "process synchronously"}.to_json if Webhookdb::Replicator.always_process_synchronously
70
+ raise NotImplementedError, "must be implemented if process_webhooks_synchronously? is true"
71
+ end
72
+
73
+ # In some cases, services may send us sensitive headers we do not want to log.
74
+ # This should be very rare but some services are designed really badly and send auth info in the webhook.
75
+ # Remove or obfuscate the passed header hash.
76
+ def preprocess_headers_for_logging(headers); end
77
+
78
+ # Return a tuple of (schema, table) based on the organization's replication schema,
79
+ # and the service integration's table name.
80
+ #
81
+ # @return [Array<Symbol>]
82
+ def schema_and_table_symbols
83
+ sch = self.service_integration.organization&.replication_schema&.to_sym || :public
84
+ tbl = self.service_integration.table_name.to_sym
85
+ return [sch, tbl]
86
+ end
87
+
88
+ # Return a Sequel identifier using +schema_and_table_symbols+,
89
+ # or +schema+ or +table+ as overrides if given.
90
+ #
91
+ # @return [Sequel::SQL::QualifiedIdentifier]
92
+ def qualified_table_sequel_identifier(schema: nil, table: nil)
93
+ sch, tbl = self.schema_and_table_symbols
94
+ return Sequel[schema || sch][table || tbl]
95
+ end
96
+
97
+ # Return a DBAdapter table based on the +schema_and_table_symbols+.
98
+ # @return [Webhookdb::DBAdapter::Table]
99
+ def dbadapter_table
100
+ sch, tbl = self.schema_and_table_symbols
101
+ schema = Webhookdb::DBAdapter::Schema.new(name: sch)
102
+ table = Webhookdb::DBAdapter::Table.new(name: tbl, schema:)
103
+ return table
104
+ end
105
+
106
+ # +Time.at(t)+, but nil if t is nil.
107
+ # Use when we have 'nullable' integer timestamps.
108
+ # @return [Time]
109
+ protected def tsat(t)
110
+ return nil if t.nil?
111
+ return Time.at(t)
112
+ end
113
+
114
+ # Given a Rack request, return the webhook response object.
115
+ # Usually this performs verification of the request based on the webhook secret
116
+ # configured on the service integration.
117
+ # Note that if +skip_webhook_verification+ is true on the service integration,
118
+ # this method always returns 201.
119
+ #
120
+ # @param [Rack::Request] request
121
+ # @return [Webhookdb::WebhookResponse]
122
+ def webhook_response(request)
123
+ return Webhookdb::WebhookResponse.ok(status: 201) if self.service_integration.skip_webhook_verification
124
+ return self._webhook_response(request)
125
+ end
126
+
127
+ # Return a the response for the webhook.
128
+ # We must do this immediately in the endpoint itself,
129
+ # since verification may include info specific to the request content
130
+ # (like, it can be whitespace sensitive).
131
+ # @abstract
132
+ # @param [Rack::Request] request
133
+ # @return [Webhookdb::WebhookResponse]
134
+ def _webhook_response(request)
135
+ raise NotImplementedError
136
+ end
137
+
138
+ # If we support webhooks, these fields correspond to the webhook state machine.
139
+ # Override them if some other fields are also needed for webhooks.
140
+ def _webhook_state_change_fields = ["webhook_secret"]
141
+
142
+ # If we support backfilling, these keys are used for them.
143
+ # Override if other fields are used instead.
144
+ # There cannot be overlap between these and the webhook state change fields.
145
+ def _backfill_state_change_fields = ["backfill_key", "backfill_secret", "api_url"]
146
+
147
+ # Set the new service integration field and
148
+ # return the newly calculated state machine.
149
+ #
150
+ # Subclasses can override this method and then super,
151
+ # to change the field or value.
152
+ #
153
+ # @param field [String] Like 'webhook_secret', 'backfill_key', etc.
154
+ # @param value [String] The value of the field.
155
+ # @param attr [String] Subclasses can pass in a custom field that does not correspond
156
+ # to a service integration column. When doing that, they must pass in attr,
157
+ # which is what will be set during the state change.
158
+ # @return [Webhookdb::Replicator::StateMachineStep]
159
+ def process_state_change(field, value, attr: nil)
160
+ attr ||= field
161
+ desc = self.descriptor
162
+ case field
163
+ when *self._webhook_state_change_fields
164
+ # If we don't support webhooks, then the backfill state machine may be using it.
165
+ meth = desc.supports_webhooks? ? :calculate_webhook_state_machine : :calculate_backfill_state_machine
166
+ when *self._backfill_state_change_fields
167
+ # If we don't support backfilling, then the create state machine may be using them.
168
+ meth = desc.supports_backfill? ? :calculate_backfill_state_machine : :calculate_webhook_state_machine
169
+ when "dependency_choice"
170
+ # Choose an upstream dependency for an integration.
171
+ # See where this is used for more details.
172
+ meth = self.preferred_create_state_machine_method
173
+ value = self._find_dependency_candidate(value)
174
+ attr = "depends_on"
175
+ when "noop_create"
176
+ # Use this to just recalculate the state machine,
177
+ # not make any changes to the data.
178
+ return self.calculate_preferred_create_state_machine
179
+ else
180
+ raise ArgumentError, "Field '#{field}' is not valid for a state change"
181
+ end
182
+ self.service_integration.db.transaction do
183
+ self.service_integration.send(:"#{attr}=", value)
184
+ self.service_integration.save_changes
185
+ step = self.send(meth)
186
+ if step.successful? && meth == :calculate_backfill_state_machine
187
+ # If we are processing the backfill state machine, and we finish successfully,
188
+ # we always want to start syncing.
189
+ self._enqueue_backfill_jobs(incremental: true)
190
+ end
191
+ return step
192
+ end
193
+ end
194
+
195
+ # If the integration supports webhooks, then we want to do that on create.
196
+ # If it's backfill only, then we fall back to that instead.
197
+ # Things like choosing dependencies are webhook-vs-backfill agnostic,
198
+ # so which machine we choose isn't that important (but it does happen during creation).
199
+ # @return [Symbol]
200
+ def preferred_create_state_machine_method
201
+ return self.descriptor.supports_webhooks? ? :calculate_webhook_state_machine : :calculate_backfill_state_machine
202
+ end
203
+
204
+ # See +preferred_create_state_machine_method+.
205
+ # If we prefer backfilling, and it's successful, we also want to enqueue jobs;
206
+ # that is, use +calculate_and_backfill_state_machine+, not just +calculate_backfill_state_machine+.
207
+ # @return [Webhookdb::Replicator::StateMachineStep]
208
+ def calculate_preferred_create_state_machine
209
+ m = self.preferred_create_state_machine_method
210
+ return self.calculate_and_backfill_state_machine(incremental: true)[0] if m == :calculate_backfill_state_machine
211
+ return self.calculate_webhook_state_machine
212
+ end
213
+
214
+ def _enqueue_backfill_jobs(incremental:, criteria: nil, recursive: true, enqueue: true)
215
+ m = recursive ? :create_recursive : :create
216
+ j = Webhookdb::BackfillJob.send(
217
+ m,
218
+ service_integration:,
219
+ incremental:,
220
+ criteria: criteria || {},
221
+ created_by: Webhookdb.request_user_and_admin[0],
222
+ )
223
+ j.enqueue if enqueue
224
+ return j
225
+ end
226
+
227
+ # @param value [String]
228
+ def _find_dependency_candidate(value)
229
+ int_val = value.strip.blank? ? 1 : value.to_i
230
+ idx = int_val - 1
231
+ dep_candidates = self.service_integration.dependency_candidates
232
+ raise Webhookdb::InvalidPrecondition, "no dependency candidates" if dep_candidates.empty?
233
+ raise Webhookdb::InvalidInput, "'#{value}' is not a valid dependency" if
234
+ idx.negative? || idx >= dep_candidates.length
235
+ return dep_candidates[idx]
236
+ end
237
+
238
+ # Return the state machine that is used when setting up this integration.
239
+ # Usually this entails providing the user the webhook url,
240
+ # and providing or asking for a webhook secret. In some cases,
241
+ # this can be a lot more complex though.
242
+ #
243
+ # @abstract
244
+ # @return [Webhookdb::Replicator::StateMachineStep]
245
+ def calculate_webhook_state_machine
246
+ raise NotImplementedError
247
+ end
248
+
249
+ # Return the state machine that is used when adding backfill support to an integration.
250
+ # Usually this sets one or both of the backfill key and secret.
251
+ #
252
+ # @return [Webhookdb::Replicator::StateMachineStep]
253
+ def calculate_backfill_state_machine
254
+ # This is a pure function that can be tested on its own--the endpoints just need to return a state machine step
255
+ raise NotImplementedError
256
+ end
257
+
258
+ # Run calculate_backfill_state_machine.
259
+ # Then create and enqueue a new BackfillJob if it's successful.
260
+ # Returns a tuple of the StateMachineStep and BackfillJob.
261
+ # If the BackfillJob is returned, the StateMachineStep was successful;
262
+ # otherwise no job is created and the second item is nil.
263
+ # @return [Array<Webhookdb::StateMachineStep, Webhookdb::BackfillJob>]
264
+ def calculate_and_backfill_state_machine(incremental:, criteria: nil, recursive: true, enqueue: true)
265
+ step = self.calculate_backfill_state_machine
266
+ bfjob = nil
267
+ bfjob = self._enqueue_backfill_jobs(incremental:, criteria:, recursive:, enqueue:) if step.successful?
268
+ return step, bfjob
269
+ end
270
+
271
+ # When backfilling is not supported, this message is used.
272
+ # It can be overridden for custom explanations,
273
+ # or descriptor#documentation_url can be provided,
274
+ # which will use a default message.
275
+ # If no documentation is available, a fallback message is used.
276
+ def backfill_not_supported_message
277
+ du = self.documentation_url
278
+ if du.blank?
279
+ msg = %(Sorry, you cannot backfill this integration. You may be looking for one of the following:
280
+
281
+ webhookdb integrations reset #{self.service_integration.table_name}
282
+ )
283
+ return msg
284
+ end
285
+ msg = %(Sorry, you cannot manually backfill this integration.
286
+ Please refer to the documentation at #{du}
287
+ for information on how to refresh data.)
288
+ return msg
289
+ end
290
+
291
+ # Remove all the information used in the initial creation of the integration so that it can be re-entered
292
+ def clear_webhook_information
293
+ self._clear_webook_information
294
+ # If we don't support both webhooks and backfilling, we are safe to clear ALL fields
295
+ # and get back into an initial state.
296
+ self._clear_backfill_information unless self.descriptor.supports_webhooks_and_backfill?
297
+ self.service_integration.save_changes
298
+ end
299
+
300
+ def _clear_webook_information
301
+ self.service_integration.set(webhook_secret: "")
302
+ end
303
+
304
+ # Remove all the information needed for backfilling from the integration so that it can be re-entered
305
+ def clear_backfill_information
306
+ self._clear_backfill_information
307
+ # If we don't support both webhooks and backfilling, we are safe to clear ALL fields
308
+ # and get back into an initial state.
309
+ self._clear_webook_information unless self.descriptor.supports_webhooks_and_backfill?
310
+ self.service_integration.save_changes
311
+ end
312
+
313
+ def _clear_backfill_information
314
+ self.service_integration.set(api_url: "", backfill_key: "", backfill_secret: "")
315
+ end
316
+
317
+ # Find a dependent service integration with the given service name.
318
+ # If none are found, return nil. If multiple are found, raise,
319
+ # as this should only be used for automatically managed integrations.
320
+ # @return [Webhookdb::ServiceIntegration,nil]
321
+ def find_dependent(service_name)
322
+ sints = self.service_integration.dependents.filter { |si| si.service_name == service_name }
323
+ raise Webhookdb::InvalidPrecondition, "there are multiple #{service_name} integrations in dependents" if
324
+ sints.length > 1
325
+ return sints.first
326
+ end
327
+
328
+ # @return [Webhookdb::ServiceIntegration]
329
+ def find_dependent!(service_name)
330
+ sint = self.find_dependent(service_name)
331
+ raise Webhookdb::InvalidPrecondition, "there is no #{service_name} integration in dependents" if sint.nil?
332
+ return sint
333
+ end
334
+
335
+ # Use this to determine whether we should add an enrichment column in
336
+ # the create table modification to store the enrichment body.
337
+ def _store_enrichment_body?
338
+ return false
339
+ end
340
+
341
+ def create_table(if_not_exists: false)
342
+ cmd = self.create_table_modification(if_not_exists:)
343
+ self.admin_dataset(timeout: :fast) do |ds|
344
+ cmd.execute(ds.db)
345
+ end
346
+ end
347
+
348
+ # Return the schema modification used to create the table where it does nto exist.
349
+ # @return [Webhookdb::Replicator::SchemaModification]
350
+ def create_table_modification(if_not_exists: false)
351
+ table = self.dbadapter_table
352
+ columns = [self.primary_key_column, self.remote_key_column]
353
+ columns.concat(self.storable_columns)
354
+ # 'data' column should be last, since it's very large, we want to see other columns in psql/pgcli first
355
+ columns << self.data_column
356
+ adapter = Webhookdb::DBAdapter::PG.new
357
+ result = Webhookdb::Replicator::SchemaModification.new
358
+ result.transaction_statements << adapter.create_table_sql(table, columns, if_not_exists:)
359
+ self.indices(table).each do |dbindex|
360
+ result.transaction_statements << adapter.create_index_sql(dbindex, concurrently: false)
361
+ end
362
+ result.application_database_statements << self.service_integration.ensure_sequence_sql if self.requires_sequence?
363
+ return result
364
+ end
365
+
366
+ # We need to give indices a persistent name, unique across the schema,
367
+ # since multiple indices within a schema cannot share a name.
368
+ #
369
+ # Note that in certain RDBMS (Postgres) index names cannot exceed a certian length;
370
+ # Postgres will silently truncate them. This can result in an index not being created
371
+ # if it shares the same name as another index and we use 'CREATE INDEX IF NOT EXISTS.'
372
+ #
373
+ # To avoid this, if the generated name exceeds a certain size, an md5 hash of the column names is used.
374
+ #
375
+ # @param columns [Array<Webhookdb::DBAdapter::Column, Webhookdb::Replicator::Column>] Must respond to :name.
376
+ # @return [String]
377
+ protected def index_name(columns)
378
+ raise Webhookdb::InvalidPrecondition, "sint needs an opaque id" if self.service_integration.opaque_id.blank?
379
+ colnames = columns.map(&:name).join("_")
380
+ opaque_id = self.service_integration.opaque_id
381
+ # Handle old IDs without the leading 'svi_'.
382
+ opaque_id = "idx#{opaque_id}" if /\d/.match?(opaque_id[0])
383
+ name = "#{opaque_id}_#{colnames}_idx"
384
+ if name.size > MAX_INDEX_NAME_LENGTH
385
+ # We don't have the 32 extra chars for a full md5 hash.
386
+ # We can't convert to Base64 or whatever, since we don't want to depend on case sensitivity.
387
+ # So just lop off a few characters (normally 2) from the end of the md5.
388
+ # The collision space is so small (some combination of column names would need to have the
389
+ # same md5, which is unfathomable), we're not really worried about it.
390
+ colnames_md5 = Digest::MD5.hexdigest(colnames)
391
+ available_chars = MAX_INDEX_NAME_LENGTH - "#{opaque_id}__idx".size
392
+ name = "#{opaque_id}_#{colnames_md5[...available_chars]}_idx"
393
+ end
394
+ raise Webhookdb::InvariantViolation, "index names cannot exceed 63 chars, got #{name.size} in '#{name}'" if
395
+ name.size > 63
396
+ return name
397
+ end
398
+
399
+ MAX_INDEX_NAME_LENGTH = 63
400
+
401
+ # @return [Webhookdb::DBAdapter::Column]
402
+ def primary_key_column
403
+ return Webhookdb::DBAdapter::Column.new(name: :pk, type: BIGINT, pk: true)
404
+ end
405
+
406
+ # @return [Webhookdb::DBAdapter::Column]
407
+ def remote_key_column
408
+ return self._remote_key_column.to_dbadapter(unique: true, nullable: false)
409
+ end
410
+
411
+ # @return [Webhookdb::DBAdapter::Column]
412
+ def data_column
413
+ return Webhookdb::DBAdapter::Column.new(name: :data, type: OBJECT, nullable: false)
414
+ end
415
+
416
+ # Column used to store enrichments. Return nil if the service does not use enrichments.
417
+ # @return [Webhookdb::DBAdapter::Column,nil]
418
+ def enrichment_column
419
+ return nil unless self._store_enrichment_body?
420
+ return Webhookdb::DBAdapter::Column.new(name: :enrichment, type: OBJECT, nullable: true)
421
+ end
422
+
423
+ # @return [Array<Webhookdb::DBAdapter::Column>]
424
+ def denormalized_columns
425
+ return self._denormalized_columns.map(&:to_dbadapter)
426
+ end
427
+
428
+ # Names of columns for multi-column indices.
429
+ # Each one must be in +denormalized_columns+.
430
+ # @return [Array<Webhook::Replicator::IndexSpec>]
431
+ def _extra_index_specs
432
+ return []
433
+ end
434
+
435
+ # Denormalized columns, plus the enrichment column if supported.
436
+ # Does not include the data or external id columns, though perhaps it should.
437
+ # @return [Array<Webhookdb::DBAdapter::Column>]
438
+ def storable_columns
439
+ cols = self.denormalized_columns
440
+ if (enr = self.enrichment_column)
441
+ cols << enr
442
+ end
443
+ return cols
444
+ end
445
+
446
+ # Column to use as the 'timestamp' for the row.
447
+ # This is usually some created or updated at timestamp.
448
+ # @return [Webhookdb::DBAdapter::Column]
449
+ def timestamp_column
450
+ got = self._denormalized_columns.find { |c| c.name == self._timestamp_column_name }
451
+ raise NotImplementedError, "#{self.descriptor.name} has no timestamp column #{self._timestamp_column_name}" if
452
+ got.nil?
453
+ return got.to_dbadapter
454
+ end
455
+
456
+ # The name of the timestamp column in the schema. This column is used primarily for conditional upserts
457
+ # (ie to know if a row has changed), but also as a general way of auditing changes.
458
+ # @abstract
459
+ # @return [Symbol]
460
+ def _timestamp_column_name
461
+ raise NotImplementedError
462
+ end
463
+
464
+ # Each integration needs a single remote key, like the Shopify order id for shopify orders,
465
+ # or sid for Twilio resources. This column must be unique for the table, like a primary key.
466
+ #
467
+ # @abstract
468
+ # @return [Webhookdb::Replicator::Column]
469
+ def _remote_key_column
470
+ raise NotImplementedError
471
+ end
472
+
473
+ # When an integration needs denormalized columns, specify them here.
474
+ # Indices are created for each column.
475
+ # Modifiers can be used if columns should have a default or whatever.
476
+ # See +Webhookdb::Replicator::Column+ for more details about column fields.
477
+ #
478
+ # @return [Array<Webhookdb::Replicator::Column]
479
+ def _denormalized_columns
480
+ return []
481
+ end
482
+
483
+ # @return [Array<Webhookdb::DBAdapter::Index>]
484
+ def indices(table)
485
+ dba_columns = [self.primary_key_column, self.remote_key_column]
486
+ dba_columns.concat(self.storable_columns)
487
+ dba_cols_by_name = dba_columns.index_by(&:name)
488
+
489
+ result = []
490
+ dba_columns.select(&:index?).each do |c|
491
+ targets = [c]
492
+ idx_name = self.index_name(targets)
493
+ result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: c.index_where)
494
+ end
495
+ self._extra_index_specs.each do |spec|
496
+ targets = spec.columns.map { |n| dba_cols_by_name.fetch(n) }
497
+ idx_name = self.index_name(targets)
498
+ result << Webhookdb::DBAdapter::Index.new(name: idx_name.to_sym, table:, targets:, where: spec.where)
499
+ end
500
+ return result
501
+ end
502
+
503
+ # We support adding columns to existing integrations without having to bump the version;
504
+ # changing types, or removing/renaming columns, is not supported and should bump the version
505
+ # or must be handled out-of-band (like deleting the integration then backfilling).
506
+ # To figure out what columns we need to add, we can check what are currently defined,
507
+ # check what exists, and add denormalized columns and indices for those that are missing.
508
+ def ensure_all_columns
509
+ modification = self.ensure_all_columns_modification
510
+ return if modification.noop?
511
+ self.admin_dataset(timeout: :slow_schema) do |ds|
512
+ modification.execute(ds.db)
513
+ # We need to clear cached columns on the data since we know we're adding more.
514
+ # It's probably not a huge deal but may as well keep it in sync.
515
+ ds.send(:clear_columns_cache)
516
+ end
517
+ self.readonly_dataset { |ds| ds.send(:clear_columns_cache) }
518
+ end
519
+
520
+ # @return [Webhookdb::Replicator::SchemaModification]
521
+ def ensure_all_columns_modification
522
+ existing_cols, existing_indices = nil
523
+ max_pk = 0
524
+ sint = self.service_integration
525
+ self.admin_dataset do |ds|
526
+ return self.create_table_modification unless ds.db.table_exists?(self.qualified_table_sequel_identifier)
527
+ existing_cols = ds.columns.to_set
528
+ existing_indices = ds.db[:pg_indexes].where(
529
+ schemaname: sint.organization.replication_schema,
530
+ tablename: sint.table_name,
531
+ ).select_map(:indexname).to_set
532
+ max_pk = ds.max(:pk) || 0
533
+ end
534
+ adapter = Webhookdb::DBAdapter::PG.new
535
+ table = self.dbadapter_table
536
+ result = Webhookdb::Replicator::SchemaModification.new
537
+
538
+ missing_columns = self._denormalized_columns.delete_if { |c| existing_cols.include?(c.name) }
539
+ # Add missing columns
540
+ missing_columns.each do |whcol|
541
+ # Don't bother bulking the ADDs into a single ALTER TABLE, it won't really matter.
542
+ result.transaction_statements << adapter.add_column_sql(table, whcol.to_dbadapter)
543
+ end
544
+ # Easier to handle this explicitly than use storage_columns, but it a duplicated concept so be careful.
545
+ if (enrich_col = self.enrichment_column) && !existing_cols.include?(enrich_col.name)
546
+ result.transaction_statements << adapter.add_column_sql(table, enrich_col)
547
+ end
548
+
549
+ # Backfill values for new columns.
550
+ if missing_columns.any?
551
+ # We need to backfill values into the new column, but we don't want to lock the entire table
552
+ # as we update each row. So we need to update in chunks of rows.
553
+ # Chunk size should be large for speed (and sending over fewer queries), but small enough
554
+ # to induce a viable delay if another query is updating the same row.
555
+ # Note that the delay will only be for writes to those rows; reads will not block,
556
+ # so something a bit longer should be ok.
557
+ #
558
+ # Note that at the point these UPDATEs are running, we have the new column AND the new code inserting
559
+ # into that new column. We could in theory skip all the PKs that were added after this modification
560
+ # started to run. However considering the number of rows in this window will always be relatively low
561
+ # (though not absolutely low), and the SQL backfill operation should yield the same result
562
+ # as the Ruby operation, this doesn't seem too important.
563
+ result.nontransaction_statements.concat(missing_columns.filter_map(&:backfill_statement))
564
+ update_expr = missing_columns.to_h { |c| [c.name, c.backfill_expr || c.to_sql_expr] }
565
+ self.admin_dataset do |ds|
566
+ chunks = Webhookdb::Replicator::Base.chunked_row_update_bounds(max_pk)
567
+ chunks[...-1].each do |(lower, upper)|
568
+ update_query = ds.where { pk > lower }.where { pk <= upper }.update_sql(update_expr)
569
+ result.nontransaction_statements << update_query
570
+ end
571
+ final_update_query = ds.where { pk > chunks[-1][0] }.update_sql(update_expr)
572
+ result.nontransaction_statements << final_update_query
573
+ end
574
+ end
575
+
576
+ # Add missing indices. This should happen AFTER the UPDATE calls so the UPDATEs don't have to update indices.
577
+ self.indices(table).map do |index|
578
+ next if existing_indices.include?(index.name.to_s)
579
+ result.nontransaction_statements << adapter.create_index_sql(index, concurrently: true)
580
+ end
581
+
582
+ result.application_database_statements << sint.ensure_sequence_sql if self.requires_sequence?
583
+ return result
584
+ end
585
+
586
+ # Return an array of tuples used for splitting UPDATE queries so locks are not held on the entire table
587
+ # when backfilling values when adding new columns. See +ensure_all_columns_modification+.
588
+ #
589
+ # The returned chunks are like: [[0, 100], [100, 200], [200]],
590
+ # and meant to be used like `0 < pk <= 100`, `100 < pk <= 200`, `p, > 200`.
591
+ #
592
+ # Note that final value in the array is a single item, used like `pk > chunks[-1][0]`.
593
+ def self.chunked_row_update_bounds(max_pk, chunk_size: 1_000_000)
594
+ result = []
595
+ chunk_lower_pk = 0
596
+ chunk_upper_pk = chunk_size
597
+ while chunk_upper_pk <= max_pk
598
+ # Get chunks like 0 < pk <= 100, 100 < pk <= 200, etc
599
+ # Each loop we increment one row chunk size, until we find the chunk containing our max PK.
600
+ # Ie if row chunk size is 100, and max_pk is 450, the final chunk here is 400-500.
601
+ result << [chunk_lower_pk, chunk_upper_pk]
602
+ chunk_lower_pk += chunk_size
603
+ chunk_upper_pk += chunk_size
604
+ end
605
+ # Finally, one final chunk for all rows greater than our biggest chunk.
606
+ # For example, with a row chunk size of 100, and max_pk of 450, we got a final chunk of 400-500.
607
+ # But we could have gotten 100 writes (with a new max pk of 550), so this 'pk > 500' catches those.
608
+ result << [chunk_lower_pk]
609
+ end
610
+
611
+ # Some integrations require sequences, like when upserting rows with numerical unique ids
612
+ # (if they were random values like UUIDs we could generate them and not use a sequence).
613
+ # In those cases, the integrations can mark themselves as requiring a sequence.
614
+ #
615
+ # The sequence will be created in the *application database*,
616
+ # but it used primarily when inserting rows into the *organization/replication database*.
617
+ # This is necessary because things like sequences are not possible to migrate
618
+ # when moving replication databases.
619
+ def requires_sequence?
620
+ return false
621
+ end
622
+
623
+ # A given HTTP request may not be handled by the service integration it was sent to,
624
+ # for example where the service integration is part of some 'root' hierarchy.
625
+ # This method is called in the webhook endpoint, and should return the replicator
626
+ # used to handle the webhook request. The request is validated by the returned instance,
627
+ # and it is enqueued for processing.
628
+ #
629
+ # By default, the service called by the webhook is the one we want to use,
630
+ # so return self.
631
+ #
632
+ # @param request [Rack::Request]
633
+ # @return [Webhookdb::Replicator::Base]
634
+ def dispatch_request_to(request)
635
+ return self
636
+ end
637
+
638
+ # Upsert webhook using only a body.
639
+ # This is not valid for the rare integration which does not rely on request info,
640
+ # like when we have to take different action based on a request method.
641
+ #
642
+ # @param body [Hash]
643
+ def upsert_webhook_body(body, **kw)
644
+ return self.upsert_webhook(Webhookdb::Replicator::WebhookRequest.new(body:), **kw)
645
+ end
646
+
647
+ # Upsert a webhook request into the database. Note this is a WebhookRequest,
648
+ # NOT a Rack::Request.
649
+ #
650
+ # @param [Webhookdb::Replicator::WebhookRequest] request
651
+ def upsert_webhook(request, **kw)
652
+ return self._upsert_webhook(request, **kw)
653
+ rescue StandardError => e
654
+ self.logger.error("upsert_webhook_error", request: request.as_json, error: e)
655
+ raise
656
+ end
657
+
658
+ # Hook to be overridden, while still retaining
659
+ # top-level upsert_webhook functionality like error handling.
660
+ #
661
+ # @param request [Webhookdb::Replicator::WebhookRequest]
662
+ # @param upsert [Boolean] If false, just return what would be upserted.
663
+ def _upsert_webhook(request, upsert: true)
664
+ resource, event = self._resource_and_event(request)
665
+ return nil if resource.nil?
666
+ enrichment = self._fetch_enrichment(resource, event, request)
667
+ prepared = self._prepare_for_insert(resource, event, request, enrichment)
668
+ raise Webhookdb::InvalidPostcondition if prepared.key?(:data)
669
+ inserting = {}
670
+ data_col_val = self._resource_to_data(resource, event, request, enrichment)
671
+ inserting[:data] = self._to_json(data_col_val)
672
+ inserting[:enrichment] = self._to_json(enrichment) if self._store_enrichment_body?
673
+ inserting.merge!(prepared)
674
+ return inserting unless upsert
675
+ remote_key_col = self._remote_key_column
676
+ updating = self._upsert_update_expr(inserting, enrichment:)
677
+ update_where = self._update_where_expr
678
+ upserted_rows = self.admin_dataset(timeout: :fast) do |ds|
679
+ ds.insert_conflict(
680
+ target: remote_key_col.name,
681
+ update: updating,
682
+ update_where:,
683
+ ).insert(inserting)
684
+ end
685
+ row_changed = upserted_rows.present?
686
+ self._notify_dependents(inserting, row_changed)
687
+ self._publish_rowupsert(inserting) if row_changed
688
+ return inserting
689
+ end
690
+
691
+ # The NULL ASCII character (\u0000), when present in a string ("\u0000"),
692
+ # and then encoded into JSON ("\\u0000") is invalid in PG JSONB- its strings cannot contain NULLs
693
+ # (note that JSONB does not store the encoded string verbatim, it parses it into PG types, and a PG string
694
+ # cannot contain NULL since C strings are NULL-terminated).
695
+ #
696
+ # So we remove the "\\u0000" character from encoded JSON- for example, in the hash {x: "\u0000"},
697
+ # if we #to_json, we end up with '{"x":"\\u0000"}'. The removal of encoded NULL gives us '{"x":""}'.
698
+ #
699
+ # HOWEVER, if the encoded null is itself escaped, we MUST NOT remove it.
700
+ # For example, in the hash {x: "\u0000".to_json}.to_json (ie, a JSON string which contains another JSON string),
701
+ # we end up with '{"x":"\\\\u0000"}`, That is, a string containing the *escaped* null character.
702
+ # This is valid for PG, because it's not a NULL- it's an escaped "\", followed by "u0000".
703
+ # If we were to remove the string "\\u0000", we'd end up with '{"x":"\\"}'. This creates an invalid document.
704
+ #
705
+ # So we remove only "\\u0000" by not replacing "\\\\u0000"- replace all occurences of
706
+ # "<any one character except backslash>\\u0000" with "<character before backslash>".
707
+ def _to_json(v)
708
+ return v.to_json.gsub(/(\\\\u0000|\\u0000)/, {"\\\\u0000" => "\\\\u0000", "\\u0000" => ""})
709
+ end
710
+
711
+ # @param changed [Boolean]
712
+ def _notify_dependents(inserting, changed)
713
+ self.service_integration.dependents.each do |d|
714
+ d.replicator.on_dependency_webhook_upsert(self, inserting, changed:)
715
+ end
716
+ end
717
+
718
+ def _any_subscriptions_to_notify?
719
+ return !self.service_integration.all_webhook_subscriptions_dataset.to_notify.empty?
720
+ end
721
+
722
+ def _publish_rowupsert(row, check_for_subscriptions: true)
723
+ return unless check_for_subscriptions && self._any_subscriptions_to_notify?
724
+ payload = [
725
+ self.service_integration.id,
726
+ {
727
+ row:,
728
+ external_id_column: self._remote_key_column.name,
729
+ external_id: row[self._remote_key_column.name],
730
+ },
731
+ ]
732
+ # We AVOID pubsub here because we do NOT want to go through the router
733
+ # and audit logger for this.
734
+ event = Amigo::Event.create("webhookdb.serviceintegration.rowupsert", payload.as_json)
735
+ Webhookdb::Jobs::SendWebhook.perform_async(event.as_json)
736
+ end
737
+
738
+ # Return true if the integration requires making an API call to upsert.
739
+ # This puts the sync into a lower-priority queue
740
+ # so it is less likely to block other processing.
741
+ # This is usually true if enrichments are involved.
742
+ # @return [Boolean]
743
+ def upsert_has_deps?
744
+ return false
745
+ end
746
+
747
+ # Given the resource that is going to be inserted and an optional event,
748
+ # make an API call to enrich it with further data if needed.
749
+ # The result of this is passed to _prepare_for_insert.
750
+ #
751
+ # @param [Hash,nil] resource
752
+ # @param [Hash,nil] event
753
+ # @param [Webhookdb::Replicator::WebhookRequest] request
754
+ # @return [*]
755
+ def _fetch_enrichment(resource, event, request)
756
+ return nil
757
+ end
758
+
759
+ # The argument for insert_conflict update_where clause.
760
+ # Used to conditionally update, like updating only if a row is newer than what's stored.
761
+ # We must always have an 'update where' because we never want to overwrite with the same data
762
+ # as exists.
763
+ #
764
+ # @example With a meaningful timestmap
765
+ # self.qualified_table_sequel_identifier[:updated_at] < Sequel[:excluded][:updated_at]
766
+ #
767
+ # If an integration does not have any way to detect if a resource changed,
768
+ # it can compare data columns.
769
+ #
770
+ # @example Without a meaingful timestamp
771
+ # self.qualified_table_sequel_identifier[:data] !~ Sequel[:excluded][:data]
772
+ #
773
+ # @abstract
774
+ # @return [Sequel::SQL::Expression]
775
+ def _update_where_expr
776
+ raise NotImplementedError
777
+ end
778
+
779
+ # Given a webhook/backfill item payload,
780
+ # return the resource hash, and an optional event hash.
781
+ # If 'body' is the resource itself,
782
+ # this method returns [body, nil].
783
+ # If 'body' is an event,
784
+ # this method returns [body.resource-key, body].
785
+ # Columns can check for whether there is an event and/or body
786
+ # when converting.
787
+ #
788
+ # If this returns nil, the upsert is skipped.
789
+ #
790
+ # For example, a Stripe customer backfill upsert would be `{id: 'cus_123'}`
791
+ # when we backfill, but `{type: 'event', data: {id: 'cus_123'}}` when handling an event.
792
+ #
793
+ # @abstract
794
+ # @param [Webhookdb::Replicator::WebhookRequest] request
795
+ # @return [Array<Hash>,nil]
796
+ def _resource_and_event(request)
797
+ raise NotImplementedError
798
+ end
799
+
800
+ # Return the hash that should be inserted into the database,
801
+ # based on the denormalized columns and data given.
802
+ # @param [Hash,nil] resource
803
+ # @param [Hash,nil] event
804
+ # @param [Webhookdb::Replicator::WebhookRequest] request
805
+ # @param [Hash,nil] enrichment
806
+ # @return [Hash]
807
+ def _prepare_for_insert(resource, event, request, enrichment)
808
+ h = [self._remote_key_column].concat(self._denormalized_columns).each_with_object({}) do |col, memo|
809
+ value = col.to_ruby_value(resource:, event:, enrichment:, service_integration:)
810
+ skip = value.nil? && col.skip_nil?
811
+ memo[col.name] = value unless skip
812
+ end
813
+ return h
814
+ end
815
+
816
+ # Given the resource, return the value for the :data column.
817
+ # Only needed in rare situations where fields should be stored
818
+ # on the row, but not in :data.
819
+ # To skip :data column updates, return nil.
820
+ # @param [Hash,nil] resource
821
+ # @param [Hash,nil] event
822
+ # @param [Webhookdb::Replicator::WebhookRequest] request
823
+ # @param [Hash,nil] enrichment
824
+ # @return [Hash]
825
+ def _resource_to_data(resource, event, request, enrichment)
826
+ return resource
827
+ end
828
+
829
+ # Given the hash that is passed to the Sequel insert
830
+ # (so contains all columns, including those from _prepare_for_insert),
831
+ # return the hash used for the insert_conflict(update:) keyword args.
832
+ #
833
+ # Rather than sending over the literal values in the inserting statement
834
+ # (which is pretty verbose, like the large 'data' column),
835
+ # make a smaller statement by using 'EXCLUDED'.
836
+ #
837
+ # This can be overriden when the service requires different values
838
+ # for inserting vs. updating, such as when a column's update value
839
+ # must use the EXCLUDED table in the upsert expression.
840
+ #
841
+ # Most commonly, the use case for this is when you want to provide a row a value,
842
+ # but ONLY on insert, OR on update by ONLY if the column is nil.
843
+ # In that case, pass the result of this base method to
844
+ # +_coalesce_excluded_on_update+ (see also for more details).
845
+ #
846
+ # You can also use this method to merge :data columns together. For example:
847
+ # `super_result[:data] = Sequel.lit("#{self.service_integration.table_name}.data || excluded.data")`
848
+ #
849
+ # By default, this will use the same values for UPDATE as are used for INSERT,
850
+ # like `email = EXCLUDED.email` (the 'EXCLUDED' row being the one that failed to insert).
851
+ def _upsert_update_expr(inserting, enrichment: nil)
852
+ result = inserting.each_with_object({}) { |(c, _), h| h[c] = Sequel[:excluded][c] }
853
+ return result
854
+ end
855
+
856
+ # The string 'null' in a json column still represents 'null' but we'd rather have an actual NULL value,
857
+ # represented by 'nil'. So, return nil if the arg is nil (so we get NULL),
858
+ # otherwise return the argument.
859
+ protected def _nil_or_json(x)
860
+ return x.nil? ? nil : x.to_json
861
+ end
862
+
863
+ # Have a column set itself only on insert or if nil.
864
+ #
865
+ # Given the payload to DO UPDATE, mutate it so that
866
+ # the column names included in 'column_names' use what is already in the table,
867
+ # and fall back to what's being inserted.
868
+ # This new payload should be passed to the `update` kwarg of `insert_conflict`:
869
+ #
870
+ # ds.insert_conflict(update: self._coalesce_excluded_on_update(payload, :created_at)).insert(payload)
871
+ #
872
+ # @param update [Hash]
873
+ # @param column_names [Array<Symbol>]
874
+ def _coalesce_excluded_on_update(update, column_names)
875
+ # Now replace just the specific columns we're overriding.
876
+ column_names.each do |c|
877
+ update[c] = Sequel.function(:coalesce, self.qualified_table_sequel_identifier[c], Sequel[:excluded][c])
878
+ end
879
+ end
880
+
881
+ # Yield to a dataset using the admin connection.
882
+ # @return [Sequel::Dataset]
883
+ def admin_dataset(**kw, &)
884
+ self.with_dataset(self.service_integration.organization.admin_connection_url_raw, **kw, &)
885
+ end
886
+
887
+ # Yield to a dataset using the readonly connection.
888
+ # @return [Sequel::Dataset]
889
+ def readonly_dataset(**kw, &)
890
+ self.with_dataset(self.service_integration.organization.readonly_connection_url_raw, **kw, &)
891
+ end
892
+
893
+ protected def with_dataset(url, **kw, &block)
894
+ raise LocalJumpError if block.nil?
895
+ Webhookdb::ConnectionCache.borrow(url, **kw) do |conn|
896
+ yield(conn[self.qualified_table_sequel_identifier])
897
+ end
898
+ end
899
+
900
+ # Run the given block with a (try) advisory lock taken on a combination of:
901
+ #
902
+ # - The table OID for this replicator
903
+ # - The given key
904
+ #
905
+ # Note this this establishes a new DB connection for the advisory lock;
906
+ # we have had issues with advisory locks on reused connections,
907
+ # and this is safer than having a lock that is never released.
908
+ protected def with_advisory_lock(key, &)
909
+ url = self.service_integration.organization.admin_connection_url_raw
910
+ got = nil
911
+ Webhookdb::Dbutil.borrow_conn(url) do |conn|
912
+ table_oid = conn.select(
913
+ Sequel.function(:to_regclass, self.schema_and_table_symbols.join(".")).cast(:oid).as(:table_id),
914
+ ).first[:table_id]
915
+ self.logger.debug("taking_replicator_advisory_lock", table_oid:, key_id: key)
916
+ Sequel::AdvisoryLock.new(conn, table_oid, key).with_lock? do
917
+ got = yield
918
+ end
919
+ end
920
+ return got
921
+ end
922
+
923
+ # Some replicators support 'instant sync', because they are upserted en-masse
924
+ # rather than row-by-row. That is, usually we run sync targets on a cron,
925
+ # because otherwise we'd need to run the sync target for every row.
926
+ # But if inserting is always done through backfilling,
927
+ # we know we have a useful set of results to sync, so don't need to wait for cron.
928
+ def enqueue_sync_targets
929
+ self.service_integration.sync_targets.each do |stgt|
930
+ Webhookdb::Jobs::SyncTargetRunSync.perform_async(stgt.id)
931
+ end
932
+ end
933
+
934
+ class CredentialVerificationResult < Webhookdb::TypedStruct
935
+ attr_reader :verified, :message
936
+ end
937
+
938
+ # Try to verify backfill credentials, by fetching the first page of items.
939
+ # Only relevant for integrations supporting backfilling.
940
+ #
941
+ # If an error is received, return `_verify_backfill_<http status>_err_msg`
942
+ # as the error message, if defined. So for example, a 401 will call the method
943
+ # +_verify_backfill_401_err_msg+ if defined. If such a method is not defined,
944
+ # call and return +_verify_backfill_err_msg+.
945
+ #
946
+ # @return [Webhookdb::CredentialVerificationResult]
947
+ def verify_backfill_credentials
948
+ backfiller = self._backfillers.first
949
+ if backfiller.nil?
950
+ # If for some reason we do not have a backfiller,
951
+ # we can't verify credentials. This should never happen in practice,
952
+ # because we wouldn't call this method if the integration doesn't support it.
953
+ raise "No backfiller available for #{self.service_integration.inspect}"
954
+ end
955
+ begin
956
+ # begin backfill attempt but do not return backfill result
957
+ backfiller.fetch_backfill_page(nil, last_backfilled: nil)
958
+ rescue Webhookdb::Http::Error => e
959
+ msg = if self.respond_to?(:"_verify_backfill_#{e.status}_err_msg")
960
+ self.send(:"_verify_backfill_#{e.status}_err_msg")
961
+ else
962
+ self._verify_backfill_err_msg
963
+ end
964
+ return CredentialVerificationResult.new(verified: false, message: msg)
965
+ rescue TypeError, NoMethodError => e
966
+ # if we don't incur an HTTP error, but do incur an Error due to differences in the shapes of anticipated
967
+ # response data in the `fetch_backfill_page` function, we can assume that the credentials are okay
968
+ self.logger.info "verify_backfill_credentials_expected_failure", error: e
969
+ return CredentialVerificationResult.new(verified: true, message: "")
970
+ end
971
+ return CredentialVerificationResult.new(verified: true, message: "")
972
+ end
973
+
974
+ def _verify_backfill_err_msg
975
+ raise NotImplementedError, "each integration must provide an error message for unanticipated errors"
976
+ end
977
+
978
+ def documentation_url = nil
979
+
980
+ # In order to backfill, we need to:
981
+ # - Iterate through pages of records from the external service
982
+ # - Upsert each record
983
+ # The caveats/complexities are:
984
+ # - The backfill method should take care of retrying fetches for failed pages.
985
+ # - That means it needs to keep track of some pagination token.
986
+ # @param job [Webhookdb::BackfillJob]
987
+ def backfill(job)
988
+ raise Webhookdb::InvalidPrecondition, "job is for different service integration" unless
989
+ job.service_integration === self.service_integration
990
+
991
+ raise Webhookdb::InvariantViolation, "manual backfill not supported" unless self.descriptor.supports_backfill?
992
+
993
+ sint = self.service_integration
994
+ raise Webhookdb::Replicator::CredentialsMissing if
995
+ sint.backfill_key.blank? && sint.backfill_secret.blank? && sint.depends_on.blank?
996
+ last_backfilled = job.incremental? ? sint.last_backfilled_at : nil
997
+ new_last_backfilled = Time.now
998
+ job.update(started_at: Time.now)
999
+
1000
+ backfillers = self._backfillers(**job.criteria.symbolize_keys)
1001
+ if self._parallel_backfill && self._parallel_backfill > 1
1002
+ # Create a dedicated threadpool for these backfillers,
1003
+ # with max parallelism determined by the replicator.
1004
+ pool = Concurrent::FixedThreadPool.new(self._parallel_backfill)
1005
+ # Record any errors that occur, since they won't raise otherwise.
1006
+ # Initialize a sized array to avoid any potential race conditions (though GIL should make it not an issue?).
1007
+ errors = Array.new(backfillers.size)
1008
+ backfillers.each_with_index do |bf, idx|
1009
+ pool.post do
1010
+ bf.backfill(last_backfilled)
1011
+ rescue StandardError => e
1012
+ errors[idx] = e
1013
+ end
1014
+ end
1015
+ # We've enqueued all backfillers; do not accept anymore work.
1016
+ pool.shutdown
1017
+ loop do
1018
+ # We want to stop early if we find an error, so check for errors every 10 seconds.
1019
+ completed = pool.wait_for_termination(10)
1020
+ first_error = errors.find { |e| !e.nil? }
1021
+ if first_error.nil?
1022
+ # No error, and wait_for_termination returned true, so all work is done.
1023
+ break if completed
1024
+ # No error, but work is still going on, so loop again.
1025
+ next
1026
+ end
1027
+ # We have an error; don't run any more backfillers.
1028
+ pool.kill
1029
+ # Wait for all ongoing backfills before raising.
1030
+ pool.wait_for_termination
1031
+ raise first_error
1032
+ end
1033
+ else
1034
+ backfillers.each do |backfiller|
1035
+ backfiller.backfill(last_backfilled)
1036
+ end
1037
+ end
1038
+
1039
+ sint.update(last_backfilled_at: new_last_backfilled) if job.incremental?
1040
+ job.update(finished_at: Time.now)
1041
+ job.enqueue_children
1042
+ end
1043
+
1044
+ # If this replicator supports backfilling in parallel (running multiple backfillers at a time),
1045
+ # return the degree of paralellism (or nil if not running in parallel).
1046
+ # We leave parallelism up to the replicator, not CPU count, since most work
1047
+ # involves waiting on APIs to return.
1048
+ #
1049
+ # NOTE: These threads are in addition to any worker threads, so it's important
1050
+ # to pay attention to memory use.
1051
+ def _parallel_backfill
1052
+ return nil
1053
+ end
1054
+
1055
+ # Return backfillers for the replicator.
1056
+ # We must use an array for 'data-based' backfillers,
1057
+ # like when we need to paginate for each row in another table.
1058
+ #
1059
+ # By default, return a ServiceBackfiller,
1060
+ # which will call _fetch_backfill_page on the receiver.
1061
+ #
1062
+ # @return [Array<Webhookdb::Backfiller>]
1063
+ def _backfillers
1064
+ return [ServiceBackfiller.new(self)]
1065
+ end
1066
+
1067
+ # Basic backfiller that calls +_fetch_backfill_page+ on the given replicator.
1068
+ # Any timeouts or 5xx errors are automatically re-enqueued for a retry.
1069
+ # This behavior can be customized somewhat setting :backfiller_server_error_retries (default to 2)
1070
+ # and :backfiller_server_error_backoff on the replicator (default to 63 seconds),
1071
+ # though customization beyond that should use a custom backfiller.
1072
+ class ServiceBackfiller < Webhookdb::Backfiller
1073
+ # @!attribute svc
1074
+ # @return [Webhookdb::Replicator::Base]
1075
+ attr_reader :svc
1076
+
1077
+ attr_accessor :server_error_retries, :server_error_backoff
1078
+
1079
+ def initialize(svc)
1080
+ @svc = svc
1081
+ @server_error_retries = _getifrespondto(:backfiller_server_error_retries, 2)
1082
+ @server_error_backoff = _getifrespondto(:backfiller_server_error_backoff, 63.seconds)
1083
+ raise "#{svc} must implement :_fetch_backfill_page" unless svc.respond_to?(:_fetch_backfill_page)
1084
+ super()
1085
+ end
1086
+
1087
+ private def _getifrespondto(sym, default)
1088
+ return default unless @svc.respond_to?(sym)
1089
+ return @svc.send(sym)
1090
+ end
1091
+
1092
+ def handle_item(item)
1093
+ return @svc.upsert_webhook_body(item)
1094
+ end
1095
+
1096
+ def fetch_backfill_page(pagination_token, last_backfilled:)
1097
+ return @svc._fetch_backfill_page(pagination_token, last_backfilled:)
1098
+ rescue ::Timeout::Error, ::SocketError
1099
+ self.__retryordie
1100
+ rescue Webhookdb::Http::Error => e
1101
+ self.__retryordie if e.status >= 500
1102
+ raise
1103
+ end
1104
+
1105
+ def __retryordie
1106
+ raise Amigo::Retry::OrDie.new(self.server_error_retries, self.server_error_backoff)
1107
+ end
1108
+ end
1109
+
1110
+ # Called when the upstream dependency upserts. In most cases, you can noop;
1111
+ # but in some cases, you may want to update or fetch rows.
1112
+ # One example would be a 'db only' integration, where values are taken from the parent service
1113
+ # and added to this service's table. We may want to upsert rows in our table
1114
+ # whenever a row in our parent table changes.
1115
+ #
1116
+ # @param replicator [Webhookdb::Replicator::Base]
1117
+ # @param payload [Hash]
1118
+ # @param changed [Boolean]
1119
+ def on_dependency_webhook_upsert(replicator, payload, changed:)
1120
+ raise NotImplementedError, "this must be overridden for replicators that have dependencies"
1121
+ end
1122
+
1123
+ def calculate_dependency_state_machine_step(dependency_help:)
1124
+ raise Webhookdb::InvalidPrecondition, "#{self.descriptor.name} does not have a dependency" if
1125
+ self.class.descriptor.dependency_descriptor.nil?
1126
+ return nil if self.service_integration.depends_on_id
1127
+ step = Webhookdb::Replicator::StateMachineStep.new
1128
+ dep_descr = self.descriptor.dependency_descriptor
1129
+ candidates = self.service_integration.dependency_candidates
1130
+ if candidates.empty?
1131
+ step.output = %(This integration requires #{dep_descr.resource_name_plural} to sync.
1132
+
1133
+ You don't have any #{dep_descr.resource_name_singular} integrations yet. You can run:
1134
+
1135
+ webhookdb integrations create #{dep_descr.name}
1136
+
1137
+ to set one up. Then once that's complete, you can re-run:
1138
+
1139
+ webhookdb integrations create #{self.descriptor.name}
1140
+
1141
+ to keep going.
1142
+ )
1143
+ step.error_code = "no_candidate_dependency"
1144
+ return step.completed
1145
+ end
1146
+ choice_lines = candidates.each_with_index.
1147
+ map { |si, idx| "#{idx + 1} - #{si.table_name}" }.
1148
+ join("\n")
1149
+ step.output = %(This integration requires #{dep_descr.resource_name_plural} to sync.
1150
+ #{dependency_help.blank? ? '' : "\n#{dependency_help}\n"}
1151
+ Enter the number for the #{dep_descr.resource_name_singular} integration you want to use,
1152
+ or leave blank to choose the first option.
1153
+
1154
+ #{choice_lines}
1155
+ )
1156
+ step.prompting("Parent integration number")
1157
+ step.post_to_url = self.service_integration.authed_api_path + "/transition/dependency_choice"
1158
+ return step
1159
+ end
1160
+
1161
+ def webhook_endpoint
1162
+ return self._webhook_endpoint
1163
+ end
1164
+
1165
+ protected def _webhook_endpoint
1166
+ return self.service_integration.unauthed_webhook_endpoint
1167
+ end
1168
+
1169
+ protected def _backfill_command
1170
+ return "webhookdb backfill #{self.service_integration.opaque_id}"
1171
+ end
1172
+
1173
+ protected def _query_help_output(prefix: "You can query the table")
1174
+ sint = self.service_integration
1175
+ return %(#{prefix} through your organization's Postgres connection string:
1176
+
1177
+ psql #{sint.organization.readonly_connection_url}
1178
+ > SELECT * FROM #{sint.table_name}
1179
+
1180
+ You can also run a query through the CLI:
1181
+
1182
+ webhookdb db sql "SELECT * FROM #{sint.table_name}"
1183
+ )
1184
+ end
1185
+ end