webhookdb 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. checksums.yaml +4 -4
  2. data/db/migrations/026_undo_integration_backfill_cursor.rb +2 -0
  3. data/db/migrations/032_remove_db_defaults.rb +2 -0
  4. data/db/migrations/043_text_search.rb +2 -0
  5. data/db/migrations/047_sync_parallelism.rb +9 -0
  6. data/db/migrations/048_sync_stats.rb +9 -0
  7. data/db/migrations/049_error_handlers.rb +18 -0
  8. data/db/migrations/050_logged_webhook_indices.rb +25 -0
  9. data/db/migrations/051_partitioning.rb +9 -0
  10. data/integration/async_spec.rb +0 -2
  11. data/integration/service_integrations_spec.rb +0 -2
  12. data/lib/amigo/durable_job.rb +2 -2
  13. data/lib/amigo/job_in_context.rb +12 -0
  14. data/lib/webhookdb/api/entities.rb +6 -2
  15. data/lib/webhookdb/api/error_handlers.rb +104 -0
  16. data/lib/webhookdb/api/helpers.rb +8 -1
  17. data/lib/webhookdb/api/icalproxy.rb +22 -0
  18. data/lib/webhookdb/api/install.rb +2 -1
  19. data/lib/webhookdb/api/saved_queries.rb +1 -0
  20. data/lib/webhookdb/api/saved_views.rb +1 -0
  21. data/lib/webhookdb/api/service_integrations.rb +1 -1
  22. data/lib/webhookdb/api/sync_targets.rb +1 -1
  23. data/lib/webhookdb/api/system.rb +5 -0
  24. data/lib/webhookdb/api/webhook_subscriptions.rb +1 -0
  25. data/lib/webhookdb/api.rb +4 -1
  26. data/lib/webhookdb/apps.rb +4 -0
  27. data/lib/webhookdb/async/autoscaler.rb +10 -0
  28. data/lib/webhookdb/async/job.rb +4 -0
  29. data/lib/webhookdb/async/scheduled_job.rb +4 -0
  30. data/lib/webhookdb/async.rb +2 -0
  31. data/lib/webhookdb/backfiller.rb +17 -4
  32. data/lib/webhookdb/concurrent.rb +96 -0
  33. data/lib/webhookdb/connection_cache.rb +29 -8
  34. data/lib/webhookdb/customer.rb +2 -2
  35. data/lib/webhookdb/database_document.rb +1 -1
  36. data/lib/webhookdb/db_adapter/default_sql.rb +1 -14
  37. data/lib/webhookdb/db_adapter/partition.rb +14 -0
  38. data/lib/webhookdb/db_adapter/partitioning.rb +8 -0
  39. data/lib/webhookdb/db_adapter/pg.rb +77 -5
  40. data/lib/webhookdb/db_adapter/snowflake.rb +15 -6
  41. data/lib/webhookdb/db_adapter.rb +24 -2
  42. data/lib/webhookdb/fixtures/logged_webhooks.rb +4 -0
  43. data/lib/webhookdb/fixtures/organization_error_handlers.rb +20 -0
  44. data/lib/webhookdb/http.rb +29 -15
  45. data/lib/webhookdb/icalendar.rb +30 -9
  46. data/lib/webhookdb/jobs/amigo_test_jobs.rb +1 -1
  47. data/lib/webhookdb/jobs/backfill.rb +21 -25
  48. data/lib/webhookdb/jobs/create_mirror_table.rb +3 -4
  49. data/lib/webhookdb/jobs/deprecated_jobs.rb +2 -0
  50. data/lib/webhookdb/jobs/emailer.rb +2 -1
  51. data/lib/webhookdb/jobs/front_signalwire_message_channel_sync_inbound.rb +15 -0
  52. data/lib/webhookdb/jobs/icalendar_delete_stale_cancelled_events.rb +7 -2
  53. data/lib/webhookdb/jobs/icalendar_enqueue_syncs.rb +74 -11
  54. data/lib/webhookdb/jobs/icalendar_enqueue_syncs_for_urls.rb +22 -0
  55. data/lib/webhookdb/jobs/icalendar_sync.rb +21 -9
  56. data/lib/webhookdb/jobs/increase_event_handler.rb +3 -2
  57. data/lib/webhookdb/jobs/logged_webhooks_replay.rb +5 -3
  58. data/lib/webhookdb/jobs/message_dispatched.rb +1 -0
  59. data/lib/webhookdb/jobs/model_event_system_log_tracker.rb +7 -0
  60. data/lib/webhookdb/jobs/monitor_metrics.rb +1 -1
  61. data/lib/webhookdb/jobs/organization_database_migration_notify.rb +32 -0
  62. data/lib/webhookdb/jobs/organization_database_migration_run.rb +4 -6
  63. data/lib/webhookdb/jobs/organization_error_handler_dispatch.rb +26 -0
  64. data/lib/webhookdb/jobs/prepare_database_connections.rb +1 -0
  65. data/lib/webhookdb/jobs/process_webhook.rb +11 -12
  66. data/lib/webhookdb/jobs/renew_watch_channel.rb +7 -10
  67. data/lib/webhookdb/jobs/replication_migration.rb +5 -2
  68. data/lib/webhookdb/jobs/reset_code_create_dispatch.rb +1 -2
  69. data/lib/webhookdb/jobs/scheduled_backfills.rb +2 -2
  70. data/lib/webhookdb/jobs/send_invite.rb +3 -2
  71. data/lib/webhookdb/jobs/send_test_webhook.rb +1 -3
  72. data/lib/webhookdb/jobs/send_webhook.rb +4 -5
  73. data/lib/webhookdb/jobs/stale_row_deleter.rb +31 -0
  74. data/lib/webhookdb/jobs/sync_target_enqueue_scheduled.rb +3 -0
  75. data/lib/webhookdb/jobs/sync_target_run_sync.rb +9 -15
  76. data/lib/webhookdb/jobs/webhook_subscription_delivery_event.rb +5 -8
  77. data/lib/webhookdb/liquid/expose.rb +1 -1
  78. data/lib/webhookdb/liquid/filters.rb +1 -1
  79. data/lib/webhookdb/liquid/partial.rb +2 -2
  80. data/lib/webhookdb/logged_webhook/resilient.rb +3 -3
  81. data/lib/webhookdb/logged_webhook.rb +16 -2
  82. data/lib/webhookdb/message/email_transport.rb +1 -1
  83. data/lib/webhookdb/message.rb +2 -2
  84. data/lib/webhookdb/messages/error_generic_backfill.rb +2 -0
  85. data/lib/webhookdb/messages/error_icalendar_fetch.rb +2 -0
  86. data/lib/webhookdb/messages/error_signalwire_send_sms.rb +2 -0
  87. data/lib/webhookdb/organization/alerting.rb +50 -4
  88. data/lib/webhookdb/organization/database_migration.rb +1 -1
  89. data/lib/webhookdb/organization/db_builder.rb +4 -3
  90. data/lib/webhookdb/organization/error_handler.rb +141 -0
  91. data/lib/webhookdb/organization.rb +62 -9
  92. data/lib/webhookdb/postgres/model_utilities.rb +2 -0
  93. data/lib/webhookdb/postgres.rb +1 -3
  94. data/lib/webhookdb/replicator/base.rb +136 -29
  95. data/lib/webhookdb/replicator/base_stale_row_deleter.rb +165 -0
  96. data/lib/webhookdb/replicator/email_octopus_contact_v1.rb +0 -1
  97. data/lib/webhookdb/replicator/fake.rb +100 -88
  98. data/lib/webhookdb/replicator/front_signalwire_message_channel_app_v1.rb +105 -44
  99. data/lib/webhookdb/replicator/github_repo_v1_mixin.rb +17 -0
  100. data/lib/webhookdb/replicator/icalendar_calendar_v1.rb +144 -23
  101. data/lib/webhookdb/replicator/icalendar_event_v1.rb +20 -44
  102. data/lib/webhookdb/replicator/icalendar_event_v1_partitioned.rb +33 -0
  103. data/lib/webhookdb/replicator/intercom_contact_v1.rb +1 -0
  104. data/lib/webhookdb/replicator/intercom_conversation_v1.rb +1 -0
  105. data/lib/webhookdb/replicator/intercom_v1_mixin.rb +24 -2
  106. data/lib/webhookdb/replicator/partitionable_mixin.rb +116 -0
  107. data/lib/webhookdb/replicator/shopify_v1_mixin.rb +1 -1
  108. data/lib/webhookdb/replicator/signalwire_message_v1.rb +1 -2
  109. data/lib/webhookdb/replicator/sponsy_v1_mixin.rb +1 -1
  110. data/lib/webhookdb/replicator/transistor_episode_stats_v1.rb +0 -1
  111. data/lib/webhookdb/replicator.rb +4 -1
  112. data/lib/webhookdb/service/helpers.rb +4 -0
  113. data/lib/webhookdb/service/middleware.rb +6 -2
  114. data/lib/webhookdb/service_integration.rb +5 -0
  115. data/lib/webhookdb/signalwire.rb +1 -1
  116. data/lib/webhookdb/spec_helpers/async.rb +0 -4
  117. data/lib/webhookdb/spec_helpers/sentry.rb +32 -0
  118. data/lib/webhookdb/spec_helpers/shared_examples_for_replicators.rb +87 -1
  119. data/lib/webhookdb/spec_helpers.rb +1 -0
  120. data/lib/webhookdb/sync_target.rb +195 -29
  121. data/lib/webhookdb/tasks/admin.rb +1 -1
  122. data/lib/webhookdb/tasks/annotate.rb +1 -1
  123. data/lib/webhookdb/tasks/db.rb +13 -1
  124. data/lib/webhookdb/tasks/docs.rb +1 -1
  125. data/lib/webhookdb/tasks/fixture.rb +1 -1
  126. data/lib/webhookdb/tasks/message.rb +1 -1
  127. data/lib/webhookdb/tasks/regress.rb +1 -1
  128. data/lib/webhookdb/tasks/release.rb +1 -1
  129. data/lib/webhookdb/tasks/sidekiq.rb +1 -1
  130. data/lib/webhookdb/tasks/specs.rb +1 -1
  131. data/lib/webhookdb/version.rb +1 -1
  132. data/lib/webhookdb/webhook_subscription.rb +2 -3
  133. data/lib/webhookdb.rb +3 -1
  134. metadata +88 -54
  135. data/lib/webhookdb/jobs/organization_database_migration_notify_finished.rb +0 -21
  136. data/lib/webhookdb/jobs/organization_database_migration_notify_started.rb +0 -21
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Webhookdb::Concurrent
4
+ class Timeout < Timeout::Error; end
5
+
6
+ # Baseclass for pools for doing work across threads.
7
+ # Note that these concurrent pools are not for repeated use,
8
+ # like a normal threadpool. They are for 'fanning out' a single operation
9
+ # across multiple threads.
10
+ #
11
+ # Tasks should not error; if they error, the pool will becomes inoperable:
12
+ # +post+ and +join+ will re-raise the first task error.
13
+ class Pool
14
+ # Add work to the pool.
15
+ # Will block if no workers are free.
16
+ # Re-raises the pool's error if the pool has an error.
17
+ # This is important as we don't want the caller to keep adding work,
18
+ # if the pool is inoperable.
19
+ def post(&) = raise NotImplementedError
20
+
21
+ # Wait for all work to finish.
22
+ # Re-raise the first exception for any pool error.
23
+ def join = raise NotImplementedError
24
+ end
25
+
26
+ # Fake +Pool+ that does work in the calling thread,
27
+ # but behaves like a threaded pool (ie, tasks do not raise).
28
+ class SerialPool < Pool
29
+ def post
30
+ raise @exception if @exception
31
+ begin
32
+ yield
33
+ rescue StandardError => e
34
+ @exception = e
35
+ end
36
+ end
37
+
38
+ def join
39
+ raise @exception if @exception
40
+ end
41
+ end
42
+
43
+ # Pool that does work across a given number of threads.
44
+ # +queue_size+ is how many items can be in the queue before +post+ blocks.
45
+ # +threads+ defaults to +queue_size+, allowing at most +queue_size+ concurrent work,
46
+ # which fits the idea of a parallelized pool well.
47
+ #
48
+ # If you want the calling thread to queue up a bunch of work ahead of time,
49
+ # you can use a +Concurrent::ThreadPoolExecutor+. This pool will not allow the enqueing of more work
50
+ # while the queue is full.
51
+ class ParallelizedPool < Pool
52
+ def initialize(queue_size, timeout: nil, threads: nil)
53
+ super()
54
+ threads ||= queue_size
55
+ @timeout = timeout
56
+ @threads = (1..threads).map do
57
+ Thread.new do
58
+ loop { break unless self.do_work }
59
+ end
60
+ end
61
+ @queue = Thread::SizedQueue.new(queue_size)
62
+ @exception = nil
63
+ end
64
+
65
+ protected def do_work
66
+ task = @queue.pop
67
+ return false if task.nil?
68
+ if task == STOP
69
+ @queue.close
70
+ return false
71
+ end
72
+ begin
73
+ task.call
74
+ rescue StandardError => e
75
+ @exception ||= e
76
+ return false
77
+ end
78
+ return true
79
+ end
80
+
81
+ def post(&task)
82
+ raise @exception if @exception
83
+ added = @queue.push(task, timeout: @timeout)
84
+ raise Timeout, "waited #{@timeout} to add to the queue" if added.nil?
85
+ return true
86
+ end
87
+
88
+ def join
89
+ @queue.push(STOP)
90
+ @threads.each(&:join)
91
+ raise @exception if @exception
92
+ end
93
+
94
+ STOP = :stop
95
+ end
96
+ end
@@ -140,20 +140,41 @@ class Webhookdb::ConnectionCache
140
140
  conn ||= take_conn(url, single_threaded: true, extensions: [:pg_json, :pg_streaming])
141
141
  db_loans[:loaned][t] = conn
142
142
  end
143
- conn << "SET statement_timeout TO #{timeout * 1000}" if timeout.present?
144
- conn << "BEGIN;" if transaction
143
+ trash_conn = false
145
144
  begin
146
- result = yield conn
147
- conn << "COMMIT;" if transaction
148
- rescue Sequel::DatabaseError
149
- conn << "ROLLBACK;" if transaction
145
+ # All database operations need global handling to ensure property pool management.
146
+ conn << "SET statement_timeout TO #{timeout * 1000}" if timeout.present?
147
+ conn << "BEGIN;" if transaction
148
+ begin
149
+ result = yield conn
150
+ conn << "COMMIT;" if transaction
151
+ rescue Sequel::DatabaseError => e
152
+ # Roll back on any database error; but if we're disconnected, don't bother
153
+ # since we know the rollback won't reach the database.
154
+ conn << "ROLLBACK;" if transaction && !e.is_a?(Sequel::DatabaseDisconnectError)
155
+ raise
156
+ end
157
+ rescue Sequel::DatabaseDisconnectError
158
+ # If we're disconnected, trash this connection rather than re-adding it back to the pool.
159
+ trash_conn = true
150
160
  raise
151
161
  ensure
152
- conn << "SET statement_timeout TO 0" if timeout.present?
162
+ reraise = nil
163
+ if timeout.present?
164
+ begin
165
+ # If the timeout fails for whatever reason, assume the connection is toast
166
+ # and don't return it to the pool.
167
+ conn << "SET statement_timeout TO 0"
168
+ rescue Sequel::DatabaseError => e
169
+ reraise = e
170
+ trash_conn = true
171
+ end
172
+ end
153
173
  @mutex.synchronize do
154
174
  @dbs_for_urls[url][:loaned].delete(t)
155
- @dbs_for_urls[url][:available] << Available.new(conn, Time.now)
175
+ @dbs_for_urls[url][:available] << Available.new(conn, Time.now) unless trash_conn
156
176
  end
177
+ raise reraise if reraise
157
178
  end
158
179
  self.prune(url) if now > self.next_prune_at
159
180
  return result
@@ -16,9 +16,9 @@ class Webhookdb::Customer < Webhookdb::Postgres::Model(:customers)
16
16
  class SignupDisabled < Webhookdb::WebhookdbError; end
17
17
 
18
18
  configurable(:customer) do
19
- setting :signup_email_allowlist, ["*"], convert: ->(s) { s.split }
19
+ setting :signup_email_allowlist, ["*"], convert: lambda(&:split)
20
20
  setting :skip_authentication, false
21
- setting :skip_authentication_allowlist, [], convert: ->(s) { s.split }
21
+ setting :skip_authentication_allowlist, [], convert: lambda(&:split)
22
22
  end
23
23
 
24
24
  # The bcrypt hash cost. Changing this would invalidate all passwords!
@@ -10,7 +10,7 @@ class Webhookdb::DatabaseDocument < Webhookdb::Postgres::Model(:database_documen
10
10
  include Appydays::Configurable
11
11
  configurable(:database_document) do
12
12
  setting :skip_authentication, false
13
- setting :skip_authentication_allowlist, [], convert: ->(s) { s.split }
13
+ setting :skip_authentication_allowlist, [], convert: lambda(&:split)
14
14
  end
15
15
 
16
16
  plugin :column_encryption do |enc|
@@ -8,20 +8,7 @@ module Webhookdb::DBAdapter::DefaultSql
8
8
  return s
9
9
  end
10
10
 
11
- def create_table_sql(table, columns, if_not_exists: false)
12
- createtable = +"CREATE TABLE "
13
- createtable << "IF NOT EXISTS " if if_not_exists
14
- createtable << self.qualify_table(table)
15
- lines = ["#{createtable} ("]
16
- columns[0...-1]&.each { |c| lines << " #{self.column_create_sql(c)}," }
17
- lines << " #{self.column_create_sql(columns.last)}"
18
- lines << ")"
19
- return lines.join("\n")
20
- end
21
-
22
- def identifier_quote_char
23
- raise NotImplementedError
24
- end
11
+ def identifier_quote_char = raise NotImplementedError
25
12
 
26
13
  # We write our own escaper because we want to only escape what's needed;
27
14
  # otherwise we want to avoid quoting identifiers.
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Webhookdb::DBAdapter::Partition < Webhookdb::TypedStruct
4
+ attr_reader :parent_table, :partition_name, :suffix
5
+
6
+ def initialize(**kwargs)
7
+ super
8
+ self.typecheck!(:parent_table, Webhookdb::DBAdapter::Table)
9
+ self.typecheck!(:partition_name, Symbol)
10
+ self.typecheck!(:suffix, Symbol)
11
+ end
12
+
13
+ def partition_table = Webhookdb::DBAdapter::Table.new(name: self.partition_name, schema: self.parent_table.schema)
14
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Webhookdb::DBAdapter::Partitioning < Webhookdb::TypedStruct
4
+ HASH = :hash
5
+ RANGE = :range
6
+
7
+ attr_reader :by, :column
8
+ end
@@ -11,9 +11,7 @@ class Webhookdb::DBAdapter::PG < Webhookdb::DBAdapter
11
11
  VERIFY_TIMEOUT = 2
12
12
  VERIFY_STATEMENT = "SELECT 1"
13
13
 
14
- def identifier_quote_char
15
- return '"'
16
- end
14
+ def identifier_quote_char = '"'
17
15
 
18
16
  def create_index_sql(index, concurrently:)
19
17
  tgts = index.targets.map { |c| self.escape_identifier(c.name) }.join(", ")
@@ -26,7 +24,72 @@ class Webhookdb::DBAdapter::PG < Webhookdb::DBAdapter
26
24
  return "CREATE#{uniq} INDEX#{concurrent} IF NOT EXISTS #{idxname} ON #{tblname} (#{tgts})#{where}"
27
25
  end
28
26
 
29
- def column_create_sql(column)
27
+ def create_index_sqls(index, concurrently:, partitions: [])
28
+ return super if partitions.empty?
29
+ result = []
30
+ result << self.create_index_sql(index, concurrently: false).gsub(" ON ", " ON ONLY ")
31
+ partitions.each do |partition|
32
+ partition_idx = index.change(table: partition.partition_table, name: "#{index.name}#{partition.suffix}")
33
+ result << self.create_index_sql(partition_idx, concurrently:)
34
+ result << "ALTER INDEX #{index.name} ATTACH PARTITION #{partition_idx.name}"
35
+ end
36
+ return result
37
+ end
38
+
39
+ def create_table_sql(table, columns, if_not_exists: false, partition: nil)
40
+ columns = columns.to_a
41
+ createtable = +"CREATE TABLE "
42
+ createtable << "IF NOT EXISTS " if if_not_exists
43
+ createtable << self.qualify_table(table)
44
+
45
+ partitioned_pks = []
46
+ partitioned_uniques = []
47
+ if partition
48
+ # We cannot use PRIMARY KEY or UNIQUE when partitioning,
49
+ # so set those columns as if they're not
50
+ columns.each_with_index do |c, i|
51
+ if c.pk?
52
+ # Set the type to the serial type as if it's a normal PK
53
+ type = case c.type
54
+ when BIGINT
55
+ :bigserial
56
+ when INTEGER
57
+ :serial
58
+ else
59
+ c.type
60
+ end
61
+ columns[i] = c.change(pk: false, type:)
62
+ partitioned_pks << c
63
+ elsif c.unique?
64
+ columns[i] = c.change(unique: false)
65
+ partitioned_uniques << c
66
+ end
67
+ end
68
+ end
69
+ tbl_lines = columns.map { |c| self.create_column_sql(c) }
70
+ tbl_lines.concat(partitioned_pks.map do |c|
71
+ pkcols = [partition.column, c.name].uniq.join(", ")
72
+ "PRIMARY KEY (#{pkcols})"
73
+ end)
74
+ tbl_lines.concat(partitioned_uniques.map { |c| "UNIQUE (#{partition.column}, #{c.name})" })
75
+ lines = ["#{createtable} ("]
76
+ lines << (" " + tbl_lines.join(",\n "))
77
+ lines << ")"
78
+ if partition
79
+ m = case partition.by
80
+ when Webhookdb::DBAdapter::Partitioning::HASH
81
+ "HASH"
82
+ when Webhookdb::DBAdapter::Partitioning::RANGE
83
+ "RANGE"
84
+ else
85
+ raise ArgumentError, "unknown partition method: #{partition.by}"
86
+ end
87
+ lines << "PARTITION BY #{m} (#{partition.column})"
88
+ end
89
+ return lines.join("\n")
90
+ end
91
+
92
+ def create_column_sql(column)
30
93
  modifiers = +""
31
94
  coltype = COLTYPE_MAP.fetch(column.type)
32
95
  if column.pk?
@@ -42,8 +105,15 @@ class Webhookdb::DBAdapter::PG < Webhookdb::DBAdapter
42
105
  return "#{colname} #{coltype}#{modifiers}"
43
106
  end
44
107
 
108
+ def create_hash_partition_sql(table, partition_count, remainder)
109
+ tbl = self.qualify_table(table)
110
+ s = "CREATE TABLE #{tbl}_#{remainder} PARTITION OF #{tbl} " \
111
+ "FOR VALUES WITH (MODULUS #{partition_count}, REMAINDER #{remainder})"
112
+ return s
113
+ end
114
+
45
115
  def add_column_sql(table, column, if_not_exists: false)
46
- c = self.column_create_sql(column)
116
+ c = self.create_column_sql(column)
47
117
  ifne = if_not_exists ? " IF NOT EXISTS" : ""
48
118
  return "ALTER TABLE #{self.qualify_table(table)} ADD COLUMN#{ifne} #{c}"
49
119
  end
@@ -92,5 +162,7 @@ class Webhookdb::DBAdapter::PG < Webhookdb::DBAdapter
92
162
  TEXT_ARRAY => "text[]",
93
163
  TIMESTAMP => "timestamptz",
94
164
  UUID => "uuid",
165
+ :serial => "serial",
166
+ :bigserial => "bigserial",
95
167
  }.freeze
96
168
  end
@@ -28,10 +28,21 @@ class Webhookdb::DBAdapter::Snowflake < Webhookdb::DBAdapter
28
28
  end
29
29
 
30
30
  def create_index_sql(*)
31
- raise NotImplementedError, "Snowflake does not support indices"
31
+ raise Webhookdb::InvalidPrecondition, "Snowflake does not support indices"
32
32
  end
33
33
 
34
- def column_create_sql(column)
34
+ def create_table_sql(table, columns, if_not_exists: false, **)
35
+ createtable = +"CREATE TABLE "
36
+ createtable << "IF NOT EXISTS " if if_not_exists
37
+ createtable << self.qualify_table(table)
38
+ lines = ["#{createtable} ("]
39
+ columns[0...-1]&.each { |c| lines << " #{self.create_column_sql(c)}," }
40
+ lines << " #{self.create_column_sql(columns.last)}"
41
+ lines << ")"
42
+ return lines.join("\n")
43
+ end
44
+
45
+ def create_column_sql(column)
35
46
  modifiers = +""
36
47
  if column.unique?
37
48
  modifiers << " UNIQUE NOT NULL"
@@ -44,7 +55,7 @@ class Webhookdb::DBAdapter::Snowflake < Webhookdb::DBAdapter
44
55
  end
45
56
 
46
57
  def add_column_sql(table, column, if_not_exists: false)
47
- c = self.column_create_sql(column)
58
+ c = self.create_column_sql(column)
48
59
  # Snowflake has no 'ADD COLUMN IF NOT EXISTS' so we need to query the long way around
49
60
  add_sql = "ALTER TABLE #{self.qualify_table(table)} ADD COLUMN #{c}"
50
61
  return add_sql unless if_not_exists
@@ -118,9 +129,7 @@ class Webhookdb::DBAdapter::Snowflake < Webhookdb::DBAdapter
118
129
  conn.execute(statement)
119
130
  end
120
131
 
121
- def identifier_quote_char
122
- return ""
123
- end
132
+ def identifier_quote_char = ""
124
133
 
125
134
  COLTYPE_MAP = {
126
135
  BIGINT => "bigint",
@@ -2,6 +2,8 @@
2
2
 
3
3
  class Webhookdb::DBAdapter
4
4
  require "webhookdb/db_adapter/column_types"
5
+ require "webhookdb/db_adapter/partition"
6
+ require "webhookdb/db_adapter/partitioning"
5
7
 
6
8
  class UnsupportedAdapter < Webhookdb::ProgrammingError; end
7
9
 
@@ -149,21 +151,41 @@ class Webhookdb::DBAdapter
149
151
  raise NotImplementedError
150
152
  end
151
153
 
154
+ # Return the CREATE TABLE sql to create table with columns.
152
155
  # @param [Table] table
153
156
  # @param [Array<Column>] columns
154
157
  # @param [Schema] schema
155
- # @param [Boolean] if_not_exists
158
+ # @param [TrueClass,FalseClass] if_not_exists If true, use CREATE TABLE IF NOT EXISTS.
159
+ # @param partition [Webhookdb::DBAdapter::Partitioning,nil] If provided,
160
+ # adds a "PARTITION BY HASH (partition_column_name)" to the returned SQL.
156
161
  # @return [String]
157
- def create_table_sql(table, columns, schema: nil, if_not_exists: false)
162
+ def create_table_sql(table, columns, schema: nil, if_not_exists: false, partition: nil)
158
163
  raise NotImplementedError
159
164
  end
160
165
 
166
+ # We write our own escaper because we want to only escape what's needed;
167
+ # otherwise we want to avoid quoting identifiers.
168
+ def escape_identifier(s) = raise NotImplementedError
169
+
161
170
  # @param [Index] index
162
171
  # @return [String]
163
172
  def create_index_sql(index, concurrently:)
164
173
  raise NotImplementedError
165
174
  end
166
175
 
176
+ # Create indices, including for partitions.
177
+ # By default, just call create_index_sql and return it in a single-item array.
178
+ # Override if creating indices while using partitions requires extra logic.
179
+ # @param partitions [Array<Webhookdb::DBAdapter::Partition>]
180
+ # @return [Array<String>]
181
+ def create_index_sqls(index, concurrently:, partitions: [])
182
+ _ = partitions
183
+ return [self.create_index_sql(index, concurrently:)]
184
+ end
185
+
186
+ # @param column [Column] The column to create SQL for.
187
+ def create_column_sql(column) = raise NotImplementedError
188
+
167
189
  # @param [Table] table
168
190
  # @param [Column] column
169
191
  # @param [Boolean] if_not_exists
@@ -35,6 +35,10 @@ module Webhookdb::Fixtures::LoggedWebhooks
35
35
  self.response_status = rand(400..599)
36
36
  end
37
37
 
38
+ decorator :truncated do |t=Time.now|
39
+ self.truncated_at = t
40
+ end
41
+
38
42
  decorator :with_organization do |org={}|
39
43
  org = Webhookdb::Fixtures.organization.create(org) unless org.is_a?(Webhookdb::Organization)
40
44
  self.organization = org
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faker"
4
+
5
+ require "webhookdb/fixtures"
6
+
7
+ module Webhookdb::Fixtures::OrganizationErrorHandlers
8
+ extend Webhookdb::Fixtures
9
+
10
+ fixtured_class Webhookdb::Organization::ErrorHandler
11
+
12
+ base :organization_error_handler do
13
+ self.url ||= Faker::Internet.url
14
+ end
15
+
16
+ before_saving do |instance|
17
+ instance.organization ||= Webhookdb::Fixtures.organization.create
18
+ instance
19
+ end
20
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "appydays/configurable"
4
4
  require "appydays/loggable/httparty_formatter"
5
+ require "down/httpx"
5
6
  require "httparty"
6
7
 
7
8
  module Webhookdb::Http
@@ -96,25 +97,38 @@ module Webhookdb::Http
96
97
  options[:log_level] = self.log_level
97
98
  end
98
99
 
99
- # Convenience wrapper around Down that handles gzip.
100
+ # Convenience wrapper around Down so we can use our preferred implementation.
101
+ # See commit history for more info.
100
102
  # @return Array<Down::ChunkedIO, IO> Tuple
101
103
  def self.chunked_download(request_url, rewindable: false, **down_kw)
102
- io = Down::NetHttp.open(request_url, rewindable:, **down_kw)
103
- if io.data[:headers].fetch("Content-Encoding", "").include?("gzip")
104
- # If the response is gzipped, Down doesn't handle it properly.
105
- # Wrap it with gzip reader, and force the encoding to binary
106
- # the server may send back a header like Content-Type: text/plain; UTF-8,
107
- # so each line Down yields via #gets will have force_encoding('utf-8').
108
- # https://github.com/janko/down/issues/87
109
- io.instance_variable_set(:@encoding, "binary")
110
- io = Zlib::GzipReader.wrap(io)
111
- end
104
+ uri = URI(request_url)
105
+ raise URI::InvalidURIError, "#{request_url} must be an http/s url" unless ["http", "https"].include?(uri.scheme)
106
+ down_kw[:headers] ||= {}
107
+ down_kw[:headers]["User-Agent"] ||= self.user_agent
108
+ io = Down::Httpx.open(uri, rewindable:, **down_kw)
112
109
  return io
113
110
  end
111
+ end
112
+
113
+ class Down::Httpx
114
+ alias _original_response_error! response_error!
115
+ def response_error!(response)
116
+ # For some reason, Down's httpx backend uses TooManyRedirects for every status code...
117
+ raise Down::NotModified if response.status == 304
118
+ return self._original_response_error!(response)
119
+ end
120
+ end
114
121
 
115
- def self.gzipped?(string)
116
- return false if string.length < 3
117
- b = string[..2].bytes
118
- return b[0] == 0x1f && b[1] == 0x8b
122
+ class HTTPX::Response::Body
123
+ alias _original_initialize initialize
124
+ def initialize(*)
125
+ _original_initialize(*)
126
+ # If the encoding is an invalid one like 'utf8' vs 'utf-8', modify what's was in the charset.
127
+ # See https://github.com/HoneyryderChuck/httpx/issues/66
128
+ return unless @encoding.is_a?(String) && (md = @encoding.match(/^(utf)(\d+)$/))
129
+ @encoding = "#{md[1]}-#{md[2]}"
119
130
  end
120
131
  end
132
+
133
+ # Not sure why, but Down uses this, loads the plugin, but the constant isn't defined.
134
+ require "httpx/plugins/follow_redirects"
@@ -7,20 +7,41 @@ module Webhookdb::Icalendar
7
7
  # If a manual backfill is attempted, direct customer to this url.
8
8
  DOCUMENTATION_URL = "https://docs.webhookdb.com/guides/icalendar/"
9
9
 
10
+ EVENT_REPLICATORS = ["icalendar_event_v1", "icalendar_event_v1_partitioned"].freeze
11
+
10
12
  include Appydays::Configurable
11
13
 
12
14
  configurable(:icalendar) do
13
- # Do not store events older then this when syncing recurring events.
15
+ # Do not store events older than this when syncing recurring events.
14
16
  # Many icalendar feeds are misconfigured and this prevents enumerating 2000+ years of recurrence.
15
- setting :oldest_recurring_event, "1990-01-01", convert: ->(s) { Date.parse(s) }
16
- # Sync icalendar calendars only this often.
17
- # Most services only update every day or so. Assume it takes 5s to sync each feed (request, parse, upsert).
18
- # If you have 10,000 feeds, that is 50,000 seconds, or almost 14 hours of processing time,
19
- # or two threads for 7 hours. The resyncs are spread out across the sync period
20
- # (ie, no thundering herd every 8 hours), but it is still a good idea to sync as infrequently as possible.
17
+ setting :oldest_recurring_event, "2000-01-01", convert: ->(s) { Date.parse(s) }
18
+ # Calendars feeds are considered 'fresh' if they have been synced this long ago or less.
19
+ # Most services only update every day or so.
20
+ # Assume it takes 5s to sync each feed (request, parse, upsert).
21
+ # If you have 10,000 feeds, that is 50,000 seconds,
22
+ # or almost 14 hours of processing time, or two threads for 7 hours.
21
23
  setting :sync_period_hours, 6
24
+ # When stale feeds are scheduled for a resync,
25
+ # 'smear' them along this duration. Using 0 would immediately enqueue syncs of all stale feeds,
26
+ # which could saturate the job server. The number here means that feeds will be refreshed between every
27
+ # +sync_period_hours+ and +sync_period_hours+ + +sync_period_splay_hours+.
28
+ setting :sync_period_splay_hours, 1
29
+ # Number of threads for the 'precheck' threadpool, used when enqueing icalendar sync jobs.
30
+ # Since the precheck process uses many threads, but each check is resource-light and not latency-sensitive,
31
+ # we use a shared threadpool for it.
32
+ setting :precheck_feed_change_pool_size, 12
33
+
34
+ # Cancelled events that were last updated this long ago are deleted from the database.
35
+ setting :stale_cancelled_event_threshold_days, 20
36
+ # The stale row deleter job will look for rows this far before the threshold.
37
+ setting :stale_cancelled_event_lookback_days, 3
22
38
 
23
- # Cancelled events that were cancelled this long ago are deleted from the database.
24
- setting :stale_cancelled_event_threshold_days, 35
39
+ # The URL of the icalproxy server, if using one.
40
+ # See https://github.com/webhookdb/icalproxy for more info.
41
+ # Used to get property HTTP semantics for any icalendar feed, like Etag and HEAD requests.
42
+ setting :proxy_url, ""
43
+ # Api key of the icalproxy server, if using one.
44
+ # See https://github.com/webhookdb/icalproxy
45
+ setting :proxy_api_key, ""
25
46
  end
26
47
  end
@@ -103,7 +103,7 @@ class Webhookdb::Jobs::RetryChecker
103
103
  when "die"
104
104
  raise Amigo::Retry::Die
105
105
  else
106
- raise Amigo::Retry::Die.new(attempts, interval)
106
+ raise Amigo::Retry::OrDie.new(attempts, interval)
107
107
  end
108
108
  end
109
109
  end
@@ -13,40 +13,36 @@ class Webhookdb::Jobs::Backfill
13
13
  on "webhookdb.backfilljob.run"
14
14
  sidekiq_options queue: "netout"
15
15
 
16
- def dependent_queues
17
- # This is really the lowest-priority job so always defer to other queues.
18
- return super
19
- end
16
+ # This is really the lowest-priority job so always defer to other queues.
20
17
 
21
18
  def _perform(event)
22
19
  begin
23
20
  bfjob = self.lookup_model(Webhookdb::BackfillJob, event.payload)
24
21
  rescue RuntimeError => e
25
- self.logger.info "skipping_missing_backfill_job", error: e
22
+ self.set_job_tags(result: "skipped_missing_backfill_job", exception: e)
26
23
  return
27
24
  end
28
25
  sint = bfjob.service_integration
29
26
  bflock = bfjob.ensure_service_integration_lock
30
- self.with_log_tags(sint.log_tags.merge(backfill_job_id: bfjob.opaque_id)) do
31
- sint.db.transaction do
32
- unless bflock.lock?
33
- self.logger.info "skipping_locked_backfill_job"
34
- bfjob.update(finished_at: Time.now)
35
- break
36
- end
37
- bfjob.refresh
38
- if bfjob.finished?
39
- self.logger.info "skipping_finished_backfill_job"
40
- break
41
- end
42
- begin
43
- sint.replicator.backfill(bfjob)
44
- rescue Webhookdb::Replicator::CredentialsMissing
45
- # The credentials could have been cleared out, so just finish this job.
46
- self.logger.info "skipping_backfill_job_without_credentials"
47
- bfjob.update(finished_at: Time.now)
48
- break
49
- end
27
+ self.set_job_tags(sint.log_tags.merge(backfill_job_id: bfjob.opaque_id))
28
+ sint.db.transaction do
29
+ unless bflock.lock?
30
+ self.set_job_tags(result: "skipped_locked_backfill_job")
31
+ bfjob.update(finished_at: Time.now)
32
+ break
33
+ end
34
+ bfjob.refresh
35
+ if bfjob.finished?
36
+ self.set_job_tags(result: "skipped_finished_backfill_job")
37
+ break
38
+ end
39
+ begin
40
+ sint.replicator.backfill(bfjob)
41
+ rescue Webhookdb::Replicator::CredentialsMissing
42
+ # The credentials could have been cleared out, so just finish this job.
43
+ self.set_job_tags(result: "skipped_backfill_job_without_credentials")
44
+ bfjob.update(finished_at: Time.now)
45
+ break
50
46
  end
51
47
  end
52
48
  end
@@ -10,9 +10,8 @@ class Webhookdb::Jobs::CreateMirrorTable
10
10
 
11
11
  def _perform(event)
12
12
  sint = self.lookup_model(Webhookdb::ServiceIntegration, event)
13
- self.with_log_tags(sint.log_tags) do
14
- svc = Webhookdb::Replicator.create(sint)
15
- svc.create_table(if_not_exists: true)
16
- end
13
+ self.set_job_tags(sint.log_tags)
14
+ svc = Webhookdb::Replicator.create(sint)
15
+ svc.create_table(if_not_exists: true)
17
16
  end
18
17
  end