@haathie/pgmb 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1018 @@
1
+ SET search_path TO pgmb;
2
+
3
+ -- create the configuration table for pgmb ----------------
4
+
5
+ CREATE TYPE config_type AS ENUM(
6
+ 'plugin_version',
7
+ -- how long to retain old partitions?
8
+ -- partitions older than this interval will be deleted
9
+ 'partition_retention_period',
10
+ -- how far into the future to create partitions
11
+ 'future_intervals_to_create',
12
+ 'partition_interval',
13
+ 'poll_chunk_size'
14
+ );
15
+
16
+ CREATE TABLE IF NOT EXISTS config(
17
+ -- unique identifier for the subscription config
18
+ id config_type PRIMARY KEY,
19
+ value TEXT
20
+ );
21
+
22
+ CREATE OR REPLACE FUNCTION get_config_value(
23
+ config_id config_type
24
+ ) RETURNS TEXT AS $$
25
+ SELECT value FROM config WHERE id = config_id
26
+ $$ LANGUAGE sql STRICT STABLE PARALLEL SAFE SET SEARCH_PATH TO pgmb;
27
+
28
+ INSERT INTO config(id, value)
29
+ VALUES
30
+ ('plugin_version', '0.2.0'),
31
+ ('partition_retention_period', '60 minutes'),
32
+ ('future_intervals_to_create', '120 minutes'),
33
+ ('partition_interval', '30 minutes'),
34
+ ('poll_chunk_size', '10000');
35
+
36
+ -- we'll create the events table next & its functions ---------------
37
+
38
+ CREATE DOMAIN event_id AS VARCHAR(24);
39
+
40
+ -- fn to create a random bigint.
41
+ CREATE OR REPLACE FUNCTION create_random_bigint()
42
+ RETURNS BIGINT AS $$
43
+ BEGIN
44
+ -- the message ID allows for 7 hex-bytes of randomness,
45
+ -- i.e. 28 bits of randomness. Thus, the max we allow is 2^28/2
46
+ -- i.e. 0xffffff8, which allows for batch inserts to increment the
47
+ -- randomness for up to another 2^28/2 messages (more than enough)
48
+ RETURN (random() * 0xffffff8)::BIGINT;
49
+ END
50
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL SAFE;
51
+
52
+ -- Creates a timestamped event ID. It is a 24-character string
53
+ -- that consists of:
54
+ -- 1. 'pm' prefix
55
+ -- 2. 13-character hex representation of the timestamp in microseconds
56
+ -- 3. remaining random
57
+ CREATE OR REPLACE FUNCTION create_event_id(ts timestamptz, rand bigint)
58
+ RETURNS event_id AS $$
59
+ SELECT substr(
60
+ -- ensure we're always 24 characters long by right-padding with '0's
61
+ 'pm'
62
+ -- we'll give 13 hex characters for microsecond timestamp
63
+ || lpad(to_hex((extract(epoch from ts) * 1000000)::bigint), 13, '0')
64
+ -- fill remaining with randomness
65
+ || rpad(to_hex(rand), 9, '0'),
66
+ 1,
67
+ 24
68
+ )
69
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE SECURITY DEFINER
70
+ SET search_path TO pgmb;
71
+
72
+ CREATE OR REPLACE FUNCTION create_event_id_default()
73
+ RETURNS event_id AS $$
74
+ SELECT create_event_id(clock_timestamp(), create_random_bigint())
75
+ $$ LANGUAGE sql VOLATILE STRICT PARALLEL SAFE SECURITY DEFINER
76
+ SET search_path TO pgmb;
77
+
78
+ -- fn to extract the date from a message ID.
79
+ CREATE OR REPLACE FUNCTION extract_date_from_event_id(id event_id)
80
+ RETURNS TIMESTAMPTZ AS $$
81
+ SELECT to_timestamp(('0x' || substr(id, 3, 13))::numeric / 1000000)
82
+ $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE SECURITY INVOKER
83
+ SET search_path TO pgmb;
84
+
85
+ CREATE DOMAIN subscription_id AS VARCHAR(24);
86
+
87
+ CREATE TABLE IF NOT EXISTS events(
88
+ id event_id PRIMARY KEY DEFAULT create_event_id_default(),
89
+ topic VARCHAR(255) NOT NULL,
90
+ payload JSONB NOT NULL,
91
+ metadata JSONB,
92
+ -- if an event is directed to a specific subscription,
93
+ -- this field will be set to that subscription's ID
94
+ subscription_id subscription_id
95
+ ) PARTITION BY RANGE (id);
96
+
97
+ CREATE UNLOGGED TABLE IF NOT EXISTS unread_events (
98
+ event_id event_id PRIMARY KEY
99
+ ) WITH (
100
+ -- tune autovacuum for high insert & delete rates
101
+ autovacuum_vacuum_scale_factor = 0.01,
102
+ autovacuum_vacuum_threshold = 5000,
103
+ autovacuum_analyze_scale_factor = 0.005,
104
+ autovacuum_analyze_threshold = 1000,
105
+ autovacuum_vacuum_cost_delay = 0
106
+ );
107
+
108
+
109
+ -- statement level trigger to insert new events into unread_events.
110
+ -- The "poll_for_events" function will read from this table, and
111
+ -- dispatch events to subscriptions.
112
+ CREATE OR REPLACE FUNCTION mark_events_as_unread()
113
+ RETURNS TRIGGER AS $$
114
+ BEGIN
115
+ INSERT INTO unread_events(event_id)
116
+ SELECT e.id FROM NEW e;
117
+ RETURN NULL;
118
+ END
119
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
120
+ SET search_path TO pgmb;
121
+
122
+ CREATE TRIGGER mark_events_as_unread_trigger
123
+ AFTER INSERT ON events
124
+ REFERENCING NEW TABLE AS NEW
125
+ FOR EACH STATEMENT
126
+ EXECUTE FUNCTION mark_events_as_unread();
127
+
128
+ CREATE OR REPLACE FUNCTION get_time_partition_name(
129
+ table_id regclass,
130
+ ts timestamptz
131
+ ) RETURNS TEXT AS $$
132
+ SELECT table_id || '_' || to_char(ts, 'YYYYMMDDHH24MI')
133
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
134
+
135
+ -- finds the series of contiguous partitions and their bounds
136
+ CREATE OR REPLACE FUNCTION get_partitions_and_bounds(
137
+ table_id regclass
138
+ ) RETURNS TABLE(
139
+ lower_bound event_id,
140
+ upper_bound event_id,
141
+ partition_ids oid[]
142
+ ) AS $$
143
+ WITH partitions AS (
144
+ select
145
+ pc.oid,
146
+ REGEXP_MATCH(
147
+ pg_get_expr(pc.relpartbound, pc.oid),
148
+ '^FOR VALUES FROM \(''(.*)''\) TO \(''(.*)''\)$'
149
+ ) AS bounds
150
+ from pg_inherits pts
151
+ inner join pg_class pc on pc.oid = pts.inhrelid
152
+ where pts.inhparent = table_id
153
+ ),
154
+ -- from: https://dba.stackexchange.com/a/101010
155
+ ordered_intervals AS (
156
+ SELECT
157
+ *,
158
+ (LAG(bounds[2]) OVER (ORDER BY bounds[1]) < bounds[1] OR NULL) as step
159
+ FROM partitions
160
+ ),
161
+ grouped_intervals AS (
162
+ select *, count(step) over (order by bounds[1]) as grp
163
+ from ordered_intervals
164
+ )
165
+ select
166
+ MIN(bounds[1]),
167
+ MAX(bounds[2]),
168
+ array_agg(oid)
169
+ FROM grouped_intervals
170
+ GROUP BY grp;
171
+ $$ LANGUAGE sql STABLE PARALLEL SAFE SECURITY INVOKER
172
+ SET search_path TO pgmb;
173
+
174
+ -- Partition maintenance function for events table. Creates partitions for
175
+ -- the current and next interval. Deletes partitions that are older than the
176
+ -- configured time interval.
177
+ -- Exact partition size and oldest partition interval can be configured
178
+ -- using the "subscriptions_config" table.
179
+ CREATE OR REPLACE FUNCTION maintain_time_partitions_using_event_id(
180
+ table_id regclass,
181
+ partition_interval INTERVAL,
182
+ future_interval INTERVAL,
183
+ retention_period INTERVAL,
184
+ additional_sql TEXT DEFAULT NULL,
185
+ current_ts timestamptz DEFAULT NOW()
186
+ )
187
+ RETURNS void AS $$
188
+ DECLARE
189
+ ts_trunc timestamptz := date_bin(partition_interval, current_ts, '2000-1-1');
190
+ oldest_pt_to_keep text := pgmb
191
+ .get_time_partition_name(table_id, ts_trunc - retention_period);
192
+ p_info RECORD;
193
+ lock_key CONSTANT BIGINT :=
194
+ hashtext('pgmb.maintain_tp.' || table_id::text);
195
+ ranges_to_create tstzrange[];
196
+ cur_range tstzrange;
197
+ BEGIN
198
+ ASSERT partition_interval >= interval '1 minute',
199
+ 'partition_interval must be at least 1 minute';
200
+ ASSERT future_interval >= partition_interval,
201
+ 'future_interval must be at least as large as partition_interval';
202
+
203
+ IF NOT pg_try_advisory_xact_lock(lock_key) THEN
204
+ -- another process is already maintaining partitions for this table
205
+ RETURN;
206
+ END IF;
207
+
208
+ -- find all intervals we need to create partitions for
209
+ WITH existing_part_ranges AS (
210
+ SELECT
211
+ tstzrange(
212
+ extract_date_from_event_id(lower_bound),
213
+ extract_date_from_event_id(upper_bound),
214
+ '[]'
215
+ ) as range
216
+ FROM pgmb.get_partitions_and_bounds(table_id)
217
+ ),
218
+ future_tzs AS (
219
+ SELECT
220
+ tstzrange(dt, dt + partition_interval, '[]') AS range
221
+ FROM generate_series(
222
+ ts_trunc,
223
+ ts_trunc + future_interval,
224
+ partition_interval
225
+ ) AS gs(dt)
226
+ ),
227
+ diffs AS (
228
+ SELECT
229
+ CASE WHEN epr.range IS NOT NULL
230
+ THEN (ftz.range::tstzmultirange - epr.range::tstzmultirange)
231
+ ELSE ftz.range::tstzmultirange
232
+ END AS ranges
233
+ FROM future_tzs ftz
234
+ LEFT JOIN existing_part_ranges epr ON ftz.range && epr.range
235
+ )
236
+ select ARRAY_AGG(u.range) FROM diffs
237
+ CROSS JOIN LATERAL unnest(diffs.ranges) AS u(range)
238
+ INTO ranges_to_create;
239
+
240
+ ranges_to_create := COALESCE(ranges_to_create, ARRAY[]::tstzrange[]);
241
+
242
+ -- go from now to future_interval
243
+ FOREACH cur_range IN ARRAY ranges_to_create LOOP
244
+ DECLARE
245
+ start_ev_id event_id := pgmb.create_event_id(lower(cur_range), 0);
246
+ end_ev_id event_id := pgmb.create_event_id(upper(cur_range), 0);
247
+ pt_name TEXT := pgmb.get_time_partition_name(table_id, lower(cur_range));
248
+ BEGIN
249
+ RAISE NOTICE 'creating partition "%". start: %, end: %',
250
+ pt_name, lower(cur_range), upper(cur_range);
251
+
252
+ EXECUTE FORMAT(
253
+ 'CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
254
+ pt_name, table_id, start_ev_id, end_ev_id
255
+ );
256
+
257
+ IF additional_sql IS NOT NULL THEN
258
+ EXECUTE REPLACE(additional_sql, '$1', pt_name);
259
+ END IF;
260
+ END;
261
+ END LOOP;
262
+
263
+ -- Drop old partitions
264
+ FOR p_info IN (
265
+ SELECT inhrelid::regclass AS child
266
+ FROM pg_catalog.pg_inherits
267
+ WHERE inhparent = table_id
268
+ AND inhrelid::regclass::text < oldest_pt_to_keep
269
+ ) LOOP
270
+ EXECUTE format('DROP TABLE %I', p_info.child);
271
+ END LOOP;
272
+ END;
273
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE SECURITY DEFINER;
274
+
275
+ CREATE OR REPLACE FUNCTION get_current_partition(
276
+ table_id regclass,
277
+ current_ts timestamptz DEFAULT NOW()
278
+ ) RETURNS regclass AS $$
279
+ SELECT inhrelid::regclass
280
+ FROM pg_catalog.pg_inherits
281
+ WHERE inhparent = table_id
282
+ AND inhrelid::regclass::text
283
+ <= pgmb.get_time_partition_name(table_id, current_ts)
284
+ ORDER BY inhrelid DESC
285
+ LIMIT 1
286
+ $$ LANGUAGE sql STABLE PARALLEL SAFE SECURITY DEFINER;
287
+
288
+ -- subscriptions table and related functions ----------------
289
+
290
+ CREATE DOMAIN group_id AS VARCHAR(48);
291
+
292
+ CREATE OR REPLACE FUNCTION create_subscription_id()
293
+ RETURNS subscription_id AS $$
294
+ SELECT 'su' || substring(
295
+ create_event_id(NOW(), create_random_bigint())
296
+ FROM 3
297
+ );
298
+ $$ LANGUAGE sql VOLATILE STRICT PARALLEL SAFE SECURITY DEFINER
299
+ SET search_path TO pgmb;
300
+
301
+ -- subscription, groups tables and functions will go here ----------------
302
+
303
+ CREATE TABLE subscription_groups(
304
+ id group_id PRIMARY KEY,
305
+ created_at TIMESTAMPTZ DEFAULT NOW(),
306
+ last_read_event_id event_id DEFAULT create_event_id(NOW(), 0)
307
+ );
308
+
309
+ CREATE TABLE subscriptions (
310
+ -- unique identifier for the subscription
311
+ id subscription_id PRIMARY KEY DEFAULT create_subscription_id(),
312
+ -- define how the subscription is grouped. subscriptions belonging
313
+ -- to the same group are read in one batch.
314
+ group_id group_id NOT NULL REFERENCES subscription_groups(id)
315
+ ON DELETE RESTRICT,
316
+ -- A SQL expression that will be used to filter events for this subscription.
317
+ -- The events table will be aliased as "e" in this expression. The subscription
318
+ -- table is available as "s".
319
+ -- Example: "e.topic = s.metadata->>'topic'",
320
+ conditions_sql TEXT NOT NULL DEFAULT 'TRUE',
321
+ -- params will be indexed, and can be used to store
322
+ -- additional parameters for the subscription's conditions_sql.
323
+ -- It's more efficient to have the same conditions_sql for multiple
324
+ -- subscriptions, and differentiate them using params.
325
+ params JSONB NOT NULL DEFAULT '{}'::jsonb,
326
+
327
+ identity bigint GENERATED ALWAYS AS (
328
+ hashtext(
329
+ group_id
330
+ || '/' || conditions_sql
331
+ || '/' || jsonb_hash(params)::text
332
+ )
333
+ ) STORED UNIQUE,
334
+ -- when was this subscription last active
335
+ last_active_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
336
+ -- how long before this subscription expires since
337
+ -- its last_active_at time. NULL means it never expires.
338
+ expiry_interval INTERVAL
339
+ );
340
+
341
+ -- immutable fn to add interval to timestamptz
342
+ CREATE FUNCTION add_interval_imm(tstz TIMESTAMPTZ, itvl INTERVAL)
343
+ RETURNS TIMESTAMPTZ AS $$
344
+ SELECT tstz + itvl;
345
+ $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE
346
+ SET search_path TO pgmb;
347
+
348
+ -- note: index to quickly find expired subscriptions, not creating
349
+ -- a column separately because there's some weird deadlock issue
350
+ -- when creating a separate generated "expires_at" column.
351
+ CREATE INDEX ON subscriptions(
352
+ group_id,
353
+ add_interval_imm(last_active_at, expiry_interval)
354
+ ) WHERE expiry_interval IS NOT NULL;
355
+
356
+ DO $$
357
+ DECLARE
358
+ has_btree_gin BOOLEAN;
359
+ BEGIN
360
+ has_btree_gin := (
361
+ SELECT EXISTS (
362
+ SELECT 1
363
+ FROM pg_available_extensions
364
+ WHERE name = 'btree_gin'
365
+ )
366
+ );
367
+ -- create btree_gin extension if not exists, if the extension
368
+ -- is not available, we create a simpler regular GIN index instead.
369
+ IF has_btree_gin THEN
370
+ CREATE EXTENSION IF NOT EXISTS btree_gin;
371
+ -- fastupdate=false, slows down subscription creation, but ensures the costlier
372
+ -- "poll_for_events" function is executed faster.
373
+ CREATE INDEX "sub_gin" ON subscriptions USING GIN(conditions_sql, params)
374
+ WITH (fastupdate = false);
375
+ ELSE
376
+ RAISE NOTICE 'btree_gin extension is not available, using
377
+ regular GIN index for subscriptions.params';
378
+ CREATE INDEX "sub_gin" ON subscriptions USING GIN(params)
379
+ WITH (fastupdate = false);
380
+ END IF;
381
+ END
382
+ $$;
383
+
384
+ -- materialized view to hold distinct conditions_sql statements.
385
+ -- We utilise changes in this view to determine when to prepare the
386
+ -- "poll_for_events" function.
387
+ CREATE MATERIALIZED VIEW IF NOT EXISTS subscription_cond_sqls AS (
388
+ SELECT DISTINCT conditions_sql FROM subscriptions
389
+ ORDER BY conditions_sql
390
+ );
391
+
392
+ CREATE UNIQUE INDEX IF NOT EXISTS
393
+ subscription_cond_sqls_idx ON subscription_cond_sqls(conditions_sql);
394
+
395
+ -- subscription events holds the events dispatched to each subscription
396
+ -- for each group. Like events, this is also an insert-only table. Groups
397
+ -- move their cursors forward as they read events. We can implement very safe
398
+ -- cursor movement as only a single writer (poll_for_events) writes to this table,
399
+ -- and multiple readers read from it.
400
+ CREATE TABLE IF NOT EXISTS subscription_events(
401
+ id event_id,
402
+ group_id group_id,
403
+ event_id event_id,
404
+ subscription_id subscription_id
405
+ ) PARTITION BY RANGE (id);
406
+
407
+ CREATE INDEX IF NOT EXISTS subscription_events_group_idx
408
+ ON subscription_events(group_id, id);
409
+
410
+ -- Create a role with minimal access to the database. As we deal with
411
+ -- custom SQL quite often, we don't want an accidentally malicious or bad
412
+ -- SQL to have too much access to the database.
413
+ DO $$
414
+ BEGIN
415
+ IF NOT EXISTS (
416
+ SELECT 1 FROM pg_roles WHERE rolname = 'pgmb_reader'
417
+ ) THEN
418
+ CREATE ROLE pgmb_reader NOLOGIN NOSUPERUSER NOCREATEDB
419
+ NOCREATEROLE NOINHERIT NOREPLICATION;
420
+ END IF;
421
+ END
422
+ $$;
423
+
424
+ -- Give schema usage access
425
+ GRANT USAGE, CREATE ON SCHEMA pgmb TO pgmb_reader;
426
+ GRANT SELECT ON TABLE events TO pgmb_reader;
427
+ GRANT SELECT ON TABLE config TO pgmb_reader;
428
+ GRANT SELECT ON TABLE subscriptions TO pgmb_reader;
429
+ GRANT SELECT, UPDATE, DELETE ON TABLE unread_events TO pgmb_reader;
430
+ -- Grant insert-only access to "subscription_events"
431
+ GRANT UPDATE, INSERT ON TABLE subscription_events TO pgmb_reader;
432
+
433
+ SET ROLE pgmb_reader;
434
+ -- This trigger puts the conditions_sql through a syntax check
435
+ CREATE OR REPLACE FUNCTION validate_subscription_conditions_sql()
436
+ RETURNS TRIGGER AS $$
437
+ BEGIN
438
+ EXECUTE 'SELECT * FROM jsonb_populate_recordset(NULL::pgmb.events, ''[]'') e
439
+ INNER JOIN jsonb_populate_recordset(NULL::pgmb.subscriptions, ''[{}]'') s
440
+ ON ' || NEW.conditions_sql;
441
+ RETURN NEW;
442
+ END;
443
+ $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
444
+ SET search_path TO pgmb
445
+ SECURITY DEFINER;
446
+ RESET ROLE;
447
+
448
+ CREATE TRIGGER validate_subscription_conditions_sql_trigger
449
+ BEFORE INSERT OR UPDATE ON subscriptions
450
+ FOR EACH ROW
451
+ EXECUTE FUNCTION validate_subscription_conditions_sql();
452
+
453
+ -- poll_for_events function template. As we add/remove different subscriptions,
454
+ -- we'll prepare a new version of the "poll_for_events" function with
455
+ -- the updated "conditions_sql" statements.
456
+ -- A template function is used so we get syntax highlighting and checking
457
+ -- when editing this function.
458
+ CREATE OR REPLACE FUNCTION poll_for_events_tmpl()
459
+ RETURNS INT AS $body$
460
+ DECLARE
461
+ read_ids event_id[];
462
+ max_id event_id;
463
+ min_id event_id;
464
+
465
+ chunk_size INT := get_config_value('poll_chunk_size')::INT;
466
+
467
+ inserted_rows integer;
468
+
469
+ start_num BIGINT := create_random_bigint();
470
+ write_start TIMESTAMPTZ;
471
+
472
+ lock_key CONSTANT BIGINT :=
473
+ hashtext('pgmb.poll_for_events');
474
+ BEGIN
475
+ IF NOT pg_try_advisory_xact_lock(lock_key) THEN
476
+ -- another process is already polling for events
477
+ RETURN 0;
478
+ END IF;
479
+
480
+ WITH to_delete AS (
481
+ SELECT td.event_id
482
+ FROM unread_events td
483
+ WHERE td.event_id < create_event_id(NOW(), 0)
484
+ FOR UPDATE SKIP LOCKED
485
+ LIMIT chunk_size
486
+ ),
487
+ deleted AS (
488
+ DELETE FROM unread_events re
489
+ USING to_delete td
490
+ WHERE re.event_id = td.event_id
491
+ )
492
+ SELECT
493
+ MAX(event_id),
494
+ MIN(event_id),
495
+ ARRAY_AGG(event_id)
496
+ INTO max_id, min_id, read_ids
497
+ FROM to_delete;
498
+
499
+ IF max_id IS NULL THEN
500
+ RETURN 0;
501
+ END IF;
502
+
503
+ write_start := clock_timestamp();
504
+
505
+ WITH read_events AS (
506
+ SELECT e.*
507
+ FROM events e
508
+ INNER JOIN unnest(read_ids) r(id) ON e.id = r.id
509
+ WHERE e.id <= max_id AND e.id >= min_id
510
+ )
511
+ INSERT INTO subscription_events(id, group_id, subscription_id, event_id)
512
+ SELECT
513
+ create_event_id(write_start, start_num + row_number() OVER ()),
514
+ s.group_id,
515
+ s.id,
516
+ e.id
517
+ FROM read_events e
518
+ INNER JOIN subscriptions s ON
519
+ s.id = e.subscription_id
520
+ OR (
521
+ e.subscription_id IS NULL
522
+ AND (
523
+ -- Do not edit this line directly. Will be replaced
524
+ -- in the prepared function.
525
+ TRUE -- CONDITIONS_SQL_PLACEHOLDER --
526
+ )
527
+ )
528
+ ON CONFLICT DO NOTHING;
529
+
530
+ GET DIAGNOSTICS inserted_rows = ROW_COUNT;
531
+
532
+ -- return total inserted events
533
+ RETURN inserted_rows;
534
+ END;
535
+ $body$ LANGUAGE plpgsql VOLATILE STRICT PARALLEL UNSAFE
536
+ SET search_path TO pgmb
537
+ SECURITY DEFINER;
538
+
539
+ CREATE OR REPLACE FUNCTION prepare_poll_for_events_fn(
540
+ sql_statements TEXT[]
541
+ ) RETURNS VOID AS $$
542
+ DECLARE
543
+ tmpl_proc_name constant TEXT :=
544
+ 'poll_for_events_tmpl';
545
+ tmpl_proc_placeholder constant TEXT :=
546
+ 'TRUE -- CONDITIONS_SQL_PLACEHOLDER --';
547
+ condition_sql TEXT;
548
+ proc_src TEXT;
549
+ BEGIN
550
+ IF sql_statements = '{}' THEN
551
+ -- no subscriptions, so just use 'FALSE' to avoid any matches
552
+ sql_statements := ARRAY['FALSE'];
553
+ END IF;
554
+ -- build the condition SQL
555
+ condition_sql := FORMAT(
556
+ '('
557
+ || array_to_string(
558
+ ARRAY(
559
+ SELECT
560
+ '(' || stmt || ') AND s.conditions_sql = %L'
561
+ FROM unnest(sql_statements) AS arr(stmt)
562
+ ),
563
+ ') OR ('
564
+ )
565
+ || ')',
566
+ VARIADIC sql_statements
567
+ );
568
+ condition_sql := FORMAT('/* updated at %s */', NOW()) || condition_sql;
569
+
570
+ -- fetch the source of the template procedure
571
+ select pg_get_functiondef(oid) INTO proc_src
572
+ from pg_proc where proname = tmpl_proc_name and
573
+ pronamespace = 'pgmb'::regnamespace;
574
+ IF proc_src IS NULL THEN
575
+ RAISE EXCEPTION 'Template procedure % not found', tmpl_proc_name;
576
+ END IF;
577
+
578
+ -- replace the placeholder with the actual condition SQL
579
+ proc_src := REPLACE(proc_src, tmpl_proc_placeholder, condition_sql);
580
+ proc_src := REPLACE(proc_src, tmpl_proc_name, 'poll_for_events');
581
+
582
+ -- the new poll_for_events function will be created with
583
+ -- the pgmb_reader role, to avoid a bad "conditions_sql"
584
+ -- from having any destructive access to the database.
585
+ SET ROLE pgmb_reader;
586
+ EXECUTE proc_src;
587
+ RESET ROLE;
588
+ END;
589
+ $$ LANGUAGE plpgsql VOLATILE STRICT PARALLEL UNSAFE
590
+ SET search_path TO pgmb
591
+ SECURITY INVOKER;
592
+
593
+ SELECT prepare_poll_for_events_fn('{}'::text[]);
594
+
595
+ -- we'll prepare the subscription read statement whenever subscriptions
596
+ -- are created/updated/deleted
597
+ CREATE OR REPLACE FUNCTION refresh_subscription_read_statements()
598
+ RETURNS TRIGGER AS $$
599
+ DECLARE
600
+ needs_refresh BOOLEAN := FALSE;
601
+ old_conditions_sql TEXT[];
602
+ conditions_sql TEXT[];
603
+ BEGIN
604
+ old_conditions_sql := ARRAY(
605
+ SELECT * FROM subscription_cond_sqls
606
+ ORDER BY conditions_sql
607
+ );
608
+
609
+ REFRESH MATERIALIZED VIEW CONCURRENTLY subscription_cond_sqls;
610
+
611
+ conditions_sql := ARRAY(
612
+ SELECT * FROM subscription_cond_sqls
613
+ ORDER BY conditions_sql
614
+ );
615
+
616
+ -- conditions_sql hasn't changed, no need to refresh the
617
+ -- poll_for_events function
618
+ IF conditions_sql = old_conditions_sql THEN
619
+ RETURN NULL;
620
+ END IF;
621
+
622
+ PERFORM prepare_poll_for_events_fn(conditions_sql);
623
+ RETURN NULL;
624
+ END
625
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
626
+ SET search_path TO pgmb
627
+ SECURITY INVOKER;
628
+
629
+ CREATE TRIGGER refresh_subscription_read_statements_trigger
630
+ AFTER INSERT OR UPDATE OR DELETE ON subscriptions
631
+ FOR EACH STATEMENT
632
+ EXECUTE FUNCTION refresh_subscription_read_statements();
633
+
634
+ -- Utility fn to read events by their IDs. This exists as postgres
635
+ -- doesn't correctly filter which partitions to read from when using
636
+ -- an IN/JOIN clause on a partitioned table.
637
+ CREATE OR REPLACE FUNCTION read_events(
638
+ event_ids event_id[]
639
+ ) RETURNS SETOF events AS $$
640
+ DECLARE
641
+ max_id event_id;
642
+ min_id event_id;
643
+ BEGIN
644
+ IF array_length(event_ids, 1) = 0 THEN
645
+ RETURN;
646
+ END IF;
647
+
648
+ -- get min and max ids, allows PG to correctly prune partitions
649
+ SELECT
650
+ MAX(eid),
651
+ MIN(eid)
652
+ INTO max_id, min_id
653
+ FROM unnest(event_ids) AS u(eid);
654
+
655
+ RETURN QUERY
656
+ SELECT e.*
657
+ FROM events e
658
+ INNER JOIN unnest(event_ids) AS u(eid) ON e.id = u.eid
659
+ WHERE e.id <= max_id AND e.id >= min_id
660
+ ORDER BY u.eid;
661
+ END;
662
+ $$ LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
663
+ SET search_path TO pgmb;
664
+
665
+ -- fn to read next events for a subscription group
666
+ CREATE OR REPLACE FUNCTION read_next_events(
667
+ gid VARCHAR(48),
668
+ cursor event_id DEFAULT NULL,
669
+ chunk_size INT DEFAULT get_config_value('poll_chunk_size')::INT,
670
+ -- if peek is true, we do not require having to acquire the advisory
671
+ -- lock to read events. Useful to debug without blocking other readers.
672
+ peek BOOLEAN DEFAULT FALSE
673
+ ) RETURNS TABLE(
674
+ id event_id,
675
+ topic VARCHAR(255),
676
+ payload JSONB,
677
+ metadata JSONB,
678
+ subscription_ids subscription_id[],
679
+ next_cursor event_id
680
+ ) AS $$
681
+ DECLARE
682
+ lock_key CONSTANT BIGINT :=
683
+ hashtext('pgmb.read_next_events.' || gid);
684
+ BEGIN
685
+ -- provide a lock for the group, so that if we temporarily
686
+ -- or accidentally have multiple readers for the same group,
687
+ -- they don't interfere with each other.
688
+ IF NOT pg_try_advisory_lock(lock_key) AND NOT peek THEN
689
+ RETURN;
690
+ END IF;
691
+ -- fetch the cursor to read from
692
+ -- if no cursor is provided, fetch from the group's last read event id
693
+ IF cursor IS NULL THEN
694
+ SELECT sc.last_read_event_id
695
+ FROM subscription_groups sc
696
+ WHERE sc.id = gid
697
+ INTO cursor;
698
+ END IF;
699
+ -- if still null, don't return anything
700
+ IF cursor IS NULL THEN
701
+ RETURN;
702
+ END IF;
703
+
704
+ RETURN QUERY WITH next_events AS (
705
+ SELECT
706
+ se.id,
707
+ se.event_id,
708
+ se.subscription_id
709
+ FROM subscription_events se
710
+ INNER JOIN subscriptions s ON s.id = se.subscription_id
711
+ WHERE se.group_id = gid
712
+ AND se.id < create_event_id(NOW(), 0)
713
+ AND se.id > cursor
714
+ LIMIT chunk_size
715
+ ),
716
+ next_events_grp AS (
717
+ SELECT
718
+ ne.event_id,
719
+ ARRAY_AGG(ne.subscription_id) AS subscription_ids
720
+ FROM next_events ne
721
+ GROUP BY ne.event_id
722
+ ORDER BY ne.event_id
723
+ )
724
+ SELECT
725
+ e.id,
726
+ e.topic,
727
+ e.payload,
728
+ e.metadata,
729
+ ne.subscription_ids,
730
+ (SELECT MAX(ne2.id)::event_id FROM next_events ne2)
731
+ FROM read_events(ARRAY(SELECT ne.event_id FROM next_events_grp ne)) e
732
+ INNER JOIN next_events_grp ne ON ne.event_id = e.id;
733
+ END
734
+ $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
735
+ SET search_path TO pgmb
736
+ SECURITY INVOKER;
737
+
738
+ CREATE OR REPLACE FUNCTION replay_events(
739
+ gid VARCHAR(48),
740
+ sid VARCHAR(24),
741
+ from_event_id event_id,
742
+ max_events INT
743
+ ) RETURNS SETOF events AS $$
744
+ DECLARE
745
+ event_ids event_id[];
746
+ now_id event_id := create_event_id(NOW(), 0);
747
+ BEGIN
748
+ SELECT ARRAY_AGG(se.event_id) INTO event_ids
749
+ FROM subscription_events se
750
+ WHERE se.group_id = gid
751
+ AND se.subscription_id = sid
752
+ AND se.event_id > from_event_id
753
+ AND se.event_id <= now_id
754
+ -- we filter "id" by the same range too, because
755
+ -- 1. the format of se.id and e.id are the same. And rows are
756
+ -- inserted into the se table after the corresponding e row is created,
757
+ -- so if we find rows > from_event_id in se.event_id, the corresponding
758
+ -- e.id will also be > from_event_id
759
+ -- 2. it helps postgres prune which partitions to read from
760
+ AND se.id <= now_id
761
+ AND se.id > from_event_id
762
+ LIMIT (max_events + 1);
763
+ IF array_length(event_ids, 1) > max_events THEN
764
+ RAISE EXCEPTION
765
+ 'Too many events to replay. Please replay in smaller batches.';
766
+ END IF;
767
+
768
+ RETURN QUERY SELECT * FROM read_events(event_ids);
769
+ END $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
770
+ SET search_path TO pgmb
771
+ SECURITY INVOKER;
772
+
773
+ CREATE OR REPLACE FUNCTION release_group_lock(gid VARCHAR(48))
774
+ RETURNS VOID AS $$
775
+ DECLARE
776
+ lock_key CONSTANT BIGINT :=
777
+ hashtext('pgmb.read_next_events.' || gid);
778
+ BEGIN
779
+ PERFORM pg_advisory_unlock(lock_key);
780
+ END
781
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
782
+ SET search_path TO pgmb;
783
+
784
+ -- upsert the group's cursor
785
+ CREATE OR REPLACE FUNCTION set_group_cursor(
786
+ gid VARCHAR(48),
787
+ new_cursor event_id,
788
+ -- if true, release any existing lock for this group
789
+ release_lock BOOLEAN
790
+ ) RETURNS VOID AS $$
791
+ BEGIN
792
+ -- upsert the new cursor
793
+ INSERT INTO subscription_groups(id, last_read_event_id)
794
+ VALUES (gid, new_cursor)
795
+ ON CONFLICT (id) DO UPDATE
796
+ SET last_read_event_id = EXCLUDED.last_read_event_id;
797
+
798
+ -- release any existing lock for this group, if we hold one
799
+ IF release_lock THEN
800
+ PERFORM release_group_lock(gid);
801
+ END IF;
802
+ END
803
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
804
+ SET search_path TO pgmb;
805
+
806
+ CREATE OR REPLACE FUNCTION maintain_events_table(
807
+ current_ts timestamptz DEFAULT NOW()
808
+ )
809
+ RETURNS VOID AS $$
810
+ DECLARE
811
+ pi INTERVAL := get_config_value('partition_interval');
812
+ fic INTERVAL := get_config_value('future_intervals_to_create');
813
+ rp INTERVAL := get_config_value('partition_retention_period');
814
+ BEGIN
815
+ PERFORM maintain_time_partitions_using_event_id(
816
+ 'pgmb.events'::regclass,
817
+ partition_interval := pi,
818
+ future_interval := fic,
819
+ retention_period := rp,
820
+ -- turn off autovacuum on the events table, since we're not
821
+ -- going to be updating/deleting rows from it.
822
+ -- Also set fillfactor to 100 since we're only inserting.
823
+ additional_sql := 'ALTER TABLE $1 SET(
824
+ fillfactor = 100,
825
+ autovacuum_enabled = false,
826
+ toast.autovacuum_enabled = false
827
+ );',
828
+ current_ts := current_ts
829
+ );
830
+
831
+ PERFORM maintain_time_partitions_using_event_id(
832
+ 'pgmb.subscription_events'::regclass,
833
+ partition_interval := pi,
834
+ future_interval := fic,
835
+ retention_period := rp,
836
+ -- turn off autovacuum on the events table, since we're not
837
+ -- going to be updating/deleting rows from it.
838
+ -- Also set fillfactor to 100 since we're only inserting.
839
+ additional_sql := 'ALTER TABLE $1 SET(
840
+ fillfactor = 100,
841
+ autovacuum_enabled = false,
842
+ toast.autovacuum_enabled = false
843
+ );',
844
+ current_ts := current_ts
845
+ );
846
+ END;
847
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
848
+ SET search_path TO pgmb;
849
+
850
+ -- create the initial partitions
851
+ SELECT maintain_events_table();
852
+
853
+ -- triggers to add events for specific tables ---------------------------
854
+
855
+ -- Function to create a topic string for subscriptions.
856
+ -- Eg. "public" "contacts" "INSERT" -> "public.contacts.insert"
857
+ CREATE OR REPLACE FUNCTION create_topic(
858
+ schema_name name,
859
+ table_name name,
860
+ kind varchar(16)
861
+ ) RETURNS varchar(255) AS $$
862
+ SELECT lower(schema_name || '.' || table_name || '.' || kind)
863
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
864
+
865
+ -- Creates a function to compute the difference between two JSONB objects
866
+ -- Treats 'null' values, and non-existent keys as equal
867
+ -- Eg. jsonb_diff('{"a": 1, "b": 2, "c": null}', '{"a": 1, "b": null}') = '{"b": 2}'
868
+ CREATE OR REPLACE FUNCTION jsonb_diff(a jsonb, b jsonb)
869
+ RETURNS jsonb AS $$
870
+ SELECT jsonb_object_agg(key, value) FROM (
871
+ SELECT key, value FROM jsonb_each(a) WHERE value != 'null'::jsonb
872
+ EXCEPT
873
+ SELECT key, value FROM jsonb_each(b) WHERE value != 'null'::jsonb
874
+ )
875
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
876
+
877
+ -- Function to serialise a record for an event, and tell us
878
+ -- whether to emit the event or not.
879
+ -- Note: Regardless of whether to emit the event, the serialised
880
+ -- JSONB is returned.
881
+ -- By default, we always emit the event.
882
+ CREATE OR REPLACE FUNCTION serialise_record_for_event(
883
+ tabl oid,
884
+ op TEXT,
885
+ record RECORD,
886
+ serialised OUT JSONB,
887
+ emit OUT BOOLEAN
888
+ ) AS $$
889
+ BEGIN
890
+ serialised := to_jsonb(record);
891
+ emit := TRUE;
892
+ RETURN;
893
+ END
894
+ $$ LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE
895
+ SECURITY INVOKER;
896
+
897
+ -- Trigger that pushes changes to the events table
898
+ CREATE OR REPLACE FUNCTION push_table_event()
899
+ RETURNS TRIGGER AS $$
900
+ DECLARE
901
+ start_num BIGINT = create_random_bigint();
902
+ BEGIN
903
+ IF TG_OP = 'INSERT' THEN
904
+ INSERT INTO events(id, topic, payload)
905
+ SELECT
906
+ create_event_id(clock_timestamp(), rand := start_num + row_number() OVER ()),
907
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
908
+ jsonb_strip_nulls(s.data)
909
+ FROM NEW n
910
+ CROSS JOIN LATERAL
911
+ serialise_record_for_event(TG_RELID, TG_OP, n) AS s(data, emit)
912
+ WHERE s.emit;
913
+ ELSIF TG_OP = 'DELETE' THEN
914
+ INSERT INTO events(id, topic, payload)
915
+ SELECT
916
+ create_event_id(clock_timestamp(), rand := start_num + row_number() OVER ()),
917
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
918
+ jsonb_strip_nulls(to_jsonb(s.data))
919
+ FROM OLD o
920
+ CROSS JOIN LATERAL
921
+ serialise_record_for_event(TG_RELID, TG_OP, o) AS s(data, emit)
922
+ WHERE s.emit;
923
+ ELSIF TG_OP = 'UPDATE' THEN
924
+ -- For updates, we can send both old and new data
925
+ INSERT INTO events(id, topic, payload, metadata)
926
+ SELECT
927
+ create_event_id(clock_timestamp(), rand := start_num + n.rn),
928
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
929
+ jsonb_strip_nulls(jsonb_diff(n.data, o.data)),
930
+ jsonb_build_object('old', jsonb_strip_nulls(o.data))
931
+ FROM (
932
+ SELECT s.data, s.emit, row_number() OVER () AS rn
933
+ FROM NEW n
934
+ CROSS JOIN LATERAL
935
+ serialise_record_for_event(TG_RELID, TG_OP, n) AS s(data, emit)
936
+ ) AS n
937
+ INNER JOIN (
938
+ SELECT s.data, row_number() OVER () AS rn FROM OLD o
939
+ CROSS JOIN LATERAL
940
+ serialise_record_for_event(TG_RELID, TG_OP, o) AS s(data, emit)
941
+ ) AS o ON n.rn = o.rn
942
+ -- ignore rows where data didn't change
943
+ WHERE n.data IS DISTINCT FROM o.data AND n.emit;
944
+ END IF;
945
+
946
+ RETURN NULL;
947
+ END
948
+ $$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE
949
+ SET search_path TO pgmb;
950
+
951
+ -- Pushes table mutations to the events table. I.e. makes the table subscribable.
952
+ -- and creates triggers to push changes to the events table.
953
+ CREATE OR REPLACE FUNCTION push_table_mutations(
954
+ tbl regclass,
955
+ insert BOOLEAN DEFAULT TRUE,
956
+ delete BOOLEAN DEFAULT TRUE,
957
+ update BOOLEAN DEFAULT TRUE
958
+ )
959
+ RETURNS VOID AS $$
960
+ BEGIN
961
+ IF insert THEN
962
+ -- Create a trigger to push changes to the subscriptions queue
963
+ BEGIN
964
+ EXECUTE 'CREATE TRIGGER
965
+ post_insert_event
966
+ AFTER INSERT ON ' || tbl::varchar || '
967
+ REFERENCING NEW TABLE AS NEW
968
+ FOR EACH STATEMENT
969
+ EXECUTE FUNCTION push_table_event();';
970
+ EXCEPTION WHEN duplicate_object THEN
971
+ NULL;
972
+ END;
973
+ END IF;
974
+
975
+ IF delete THEN
976
+ BEGIN
977
+ EXECUTE 'CREATE TRIGGER
978
+ post_delete_event
979
+ AFTER DELETE ON ' || tbl::varchar || '
980
+ REFERENCING OLD TABLE AS OLD
981
+ FOR EACH STATEMENT
982
+ EXECUTE FUNCTION push_table_event();';
983
+ EXCEPTION WHEN duplicate_object THEN
984
+ NULL;
985
+ END;
986
+ END IF;
987
+
988
+ IF update THEN
989
+ BEGIN
990
+ EXECUTE 'CREATE TRIGGER
991
+ post_update_event
992
+ AFTER UPDATE ON ' || tbl::varchar || '
993
+ REFERENCING OLD TABLE AS OLD
994
+ NEW TABLE AS NEW
995
+ FOR EACH STATEMENT
996
+ EXECUTE FUNCTION push_table_event();';
997
+ EXCEPTION WHEN duplicate_object THEN
998
+ NULL;
999
+ END;
1000
+ END IF;
1001
+ END
1002
+ $$ LANGUAGE plpgsql SECURITY DEFINER
1003
+ VOLATILE PARALLEL UNSAFE
1004
+ SET search_path TO pgmb;
1005
+
1006
+ -- Stops the table from being subscribable.
1007
+ -- I.e removes the triggers that push changes to the events table.
1008
+ CREATE OR REPLACE FUNCTION stop_table_mutations_push(
1009
+ tbl regclass
1010
+ ) RETURNS VOID AS $$
1011
+ BEGIN
1012
+ -- Remove the triggers for the table
1013
+ EXECUTE 'DROP TRIGGER IF EXISTS post_insert_event ON ' || tbl::varchar || ';';
1014
+ EXECUTE 'DROP TRIGGER IF EXISTS post_delete_event ON ' || tbl::varchar || ';';
1015
+ EXECUTE 'DROP TRIGGER IF EXISTS post_update_event ON ' || tbl::varchar || ';';
1016
+ END
1017
+ $$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE
1018
+ SET search_path TO pgmb;