@haathie/pgmb 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/sql/pgmb.sql ADDED
@@ -0,0 +1,1030 @@
1
+ /*
2
+ to explain inner fns: https://stackoverflow.com/a/30547418
3
+
4
+ -- Enable auto_explain for debugging
5
+ LOAD 'auto_explain';
6
+ SET auto_explain.log_nested_statements = 'on';
7
+ SET auto_explain.log_min_duration = 0;
8
+ SET client_min_messages TO log;
9
+ */
10
+
11
+ CREATE SCHEMA IF NOT EXISTS "pgmb";
12
+
13
+ SET search_path TO pgmb;
14
+
15
+ -- create the configuration table for pgmb ----------------
16
+
17
+ CREATE TYPE config_type AS ENUM(
18
+ 'plugin_version',
19
+ -- how long to retain old partitions?
20
+ -- partitions older than this interval will be deleted
21
+ 'partition_retention_period',
22
+ -- how far into the future to create partitions
23
+ 'future_intervals_to_create',
24
+ 'partition_interval',
25
+ 'poll_chunk_size'
26
+ );
27
+
28
+ CREATE TABLE IF NOT EXISTS config(
29
+ -- unique identifier for the subscription config
30
+ id config_type PRIMARY KEY,
31
+ value TEXT
32
+ );
33
+
34
+ CREATE OR REPLACE FUNCTION get_config_value(
35
+ config_id config_type
36
+ ) RETURNS TEXT AS $$
37
+ SELECT value FROM config WHERE id = config_id
38
+ $$ LANGUAGE sql STRICT STABLE PARALLEL SAFE SET SEARCH_PATH TO pgmb;
39
+
40
+ INSERT INTO config(id, value)
41
+ VALUES
42
+ ('plugin_version', '0.2.0'),
43
+ ('partition_retention_period', '60 minutes'),
44
+ ('future_intervals_to_create', '120 minutes'),
45
+ ('partition_interval', '30 minutes'),
46
+ ('poll_chunk_size', '10000');
47
+
48
+ -- we'll create the events table next & its functions ---------------
49
+
50
+ CREATE DOMAIN event_id AS VARCHAR(24);
51
+
52
+ -- fn to create a random bigint.
53
+ CREATE OR REPLACE FUNCTION create_random_bigint()
54
+ RETURNS BIGINT AS $$
55
+ BEGIN
56
+ -- the message ID allows for 7 hex-bytes of randomness,
57
+ -- i.e. 28 bits of randomness. Thus, the max we allow is 2^28/2
58
+ -- i.e. 0xffffff8, which allows for batch inserts to increment the
59
+ -- randomness for up to another 2^28/2 messages (more than enough)
60
+ RETURN (random() * 0xffffff8)::BIGINT;
61
+ END
62
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL SAFE;
63
+
64
+ -- Creates a timestamped event ID. It is a 24-character string
65
+ -- that consists of:
66
+ -- 1. 'pm' prefix
67
+ -- 2. 13-character hex representation of the timestamp in microseconds
68
+ -- 3. remaining random
69
+ CREATE OR REPLACE FUNCTION create_event_id(ts timestamptz, rand bigint)
70
+ RETURNS event_id AS $$
71
+ SELECT substr(
72
+ -- ensure we're always 24 characters long by right-padding with '0's
73
+ 'pm'
74
+ -- we'll give 13 hex characters for microsecond timestamp
75
+ || lpad(to_hex((extract(epoch from ts) * 1000000)::bigint), 13, '0')
76
+ -- fill remaining with randomness
77
+ || rpad(to_hex(rand), 9, '0'),
78
+ 1,
79
+ 24
80
+ )
81
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE SECURITY DEFINER
82
+ SET search_path TO pgmb;
83
+
84
+ CREATE OR REPLACE FUNCTION create_event_id_default()
85
+ RETURNS event_id AS $$
86
+ SELECT create_event_id(clock_timestamp(), create_random_bigint())
87
+ $$ LANGUAGE sql VOLATILE STRICT PARALLEL SAFE SECURITY DEFINER
88
+ SET search_path TO pgmb;
89
+
90
+ -- fn to extract the date from a message ID.
91
+ CREATE OR REPLACE FUNCTION extract_date_from_event_id(id event_id)
92
+ RETURNS TIMESTAMPTZ AS $$
93
+ SELECT to_timestamp(('0x' || substr(id, 3, 13))::numeric / 1000000)
94
+ $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE SECURITY INVOKER
95
+ SET search_path TO pgmb;
96
+
97
+ CREATE DOMAIN subscription_id AS VARCHAR(24);
98
+
99
+ CREATE TABLE IF NOT EXISTS events(
100
+ id event_id PRIMARY KEY DEFAULT create_event_id_default(),
101
+ topic VARCHAR(255) NOT NULL,
102
+ payload JSONB NOT NULL,
103
+ metadata JSONB,
104
+ -- if an event is directed to a specific subscription,
105
+ -- this field will be set to that subscription's ID
106
+ subscription_id subscription_id
107
+ ) PARTITION BY RANGE (id);
108
+
109
+ CREATE UNLOGGED TABLE IF NOT EXISTS unread_events (
110
+ event_id event_id PRIMARY KEY
111
+ ) WITH (
112
+ -- tune autovacuum for high insert & delete rates
113
+ autovacuum_vacuum_scale_factor = 0.01,
114
+ autovacuum_vacuum_threshold = 5000,
115
+ autovacuum_analyze_scale_factor = 0.005,
116
+ autovacuum_analyze_threshold = 1000,
117
+ autovacuum_vacuum_cost_delay = 0
118
+ );
119
+
120
+
121
+ -- statement level trigger to insert new events into unread_events.
122
+ -- The "poll_for_events" function will read from this table, and
123
+ -- dispatch events to subscriptions.
124
+ CREATE OR REPLACE FUNCTION mark_events_as_unread()
125
+ RETURNS TRIGGER AS $$
126
+ BEGIN
127
+ INSERT INTO unread_events(event_id)
128
+ SELECT e.id FROM NEW e;
129
+ RETURN NULL;
130
+ END
131
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
132
+ SET search_path TO pgmb;
133
+
134
+ CREATE TRIGGER mark_events_as_unread_trigger
135
+ AFTER INSERT ON events
136
+ REFERENCING NEW TABLE AS NEW
137
+ FOR EACH STATEMENT
138
+ EXECUTE FUNCTION mark_events_as_unread();
139
+
140
+ CREATE OR REPLACE FUNCTION get_time_partition_name(
141
+ table_id regclass,
142
+ ts timestamptz
143
+ ) RETURNS TEXT AS $$
144
+ SELECT table_id || '_' || to_char(ts, 'YYYYMMDDHH24MI')
145
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
146
+
147
+ -- finds the series of contiguous partitions and their bounds
148
+ CREATE OR REPLACE FUNCTION get_partitions_and_bounds(
149
+ table_id regclass
150
+ ) RETURNS TABLE(
151
+ lower_bound event_id,
152
+ upper_bound event_id,
153
+ partition_ids oid[]
154
+ ) AS $$
155
+ WITH partitions AS (
156
+ select
157
+ pc.oid,
158
+ REGEXP_MATCH(
159
+ pg_get_expr(pc.relpartbound, pc.oid),
160
+ '^FOR VALUES FROM \(''(.*)''\) TO \(''(.*)''\)$'
161
+ ) AS bounds
162
+ from pg_inherits pts
163
+ inner join pg_class pc on pc.oid = pts.inhrelid
164
+ where pts.inhparent = table_id
165
+ ),
166
+ -- from: https://dba.stackexchange.com/a/101010
167
+ ordered_intervals AS (
168
+ SELECT
169
+ *,
170
+ (LAG(bounds[2]) OVER (ORDER BY bounds[1]) < bounds[1] OR NULL) as step
171
+ FROM partitions
172
+ ),
173
+ grouped_intervals AS (
174
+ select *, count(step) over (order by bounds[1]) as grp
175
+ from ordered_intervals
176
+ )
177
+ select
178
+ MIN(bounds[1]),
179
+ MAX(bounds[2]),
180
+ array_agg(oid)
181
+ FROM grouped_intervals
182
+ GROUP BY grp;
183
+ $$ LANGUAGE sql STABLE PARALLEL SAFE SECURITY INVOKER
184
+ SET search_path TO pgmb;
185
+
186
+ -- Partition maintenance function for events table. Creates partitions for
187
+ -- the current and next interval. Deletes partitions that are older than the
188
+ -- configured time interval.
189
+ -- Exact partition size and oldest partition interval can be configured
190
+ -- using the "subscriptions_config" table.
191
+ CREATE OR REPLACE FUNCTION maintain_time_partitions_using_event_id(
192
+ table_id regclass,
193
+ partition_interval INTERVAL,
194
+ future_interval INTERVAL,
195
+ retention_period INTERVAL,
196
+ additional_sql TEXT DEFAULT NULL,
197
+ current_ts timestamptz DEFAULT NOW()
198
+ )
199
+ RETURNS void AS $$
200
+ DECLARE
201
+ ts_trunc timestamptz := date_bin(partition_interval, current_ts, '2000-1-1');
202
+ oldest_pt_to_keep text := pgmb
203
+ .get_time_partition_name(table_id, ts_trunc - retention_period);
204
+ p_info RECORD;
205
+ lock_key CONSTANT BIGINT :=
206
+ hashtext('pgmb.maintain_tp.' || table_id::text);
207
+ ranges_to_create tstzrange[];
208
+ cur_range tstzrange;
209
+ BEGIN
210
+ ASSERT partition_interval >= interval '1 minute',
211
+ 'partition_interval must be at least 1 minute';
212
+ ASSERT future_interval >= partition_interval,
213
+ 'future_interval must be at least as large as partition_interval';
214
+
215
+ IF NOT pg_try_advisory_xact_lock(lock_key) THEN
216
+ -- another process is already maintaining partitions for this table
217
+ RETURN;
218
+ END IF;
219
+
220
+ -- find all intervals we need to create partitions for
221
+ WITH existing_part_ranges AS (
222
+ SELECT
223
+ tstzrange(
224
+ extract_date_from_event_id(lower_bound),
225
+ extract_date_from_event_id(upper_bound),
226
+ '[]'
227
+ ) as range
228
+ FROM pgmb.get_partitions_and_bounds(table_id)
229
+ ),
230
+ future_tzs AS (
231
+ SELECT
232
+ tstzrange(dt, dt + partition_interval, '[]') AS range
233
+ FROM generate_series(
234
+ ts_trunc,
235
+ ts_trunc + future_interval,
236
+ partition_interval
237
+ ) AS gs(dt)
238
+ ),
239
+ diffs AS (
240
+ SELECT
241
+ CASE WHEN epr.range IS NOT NULL
242
+ THEN (ftz.range::tstzmultirange - epr.range::tstzmultirange)
243
+ ELSE ftz.range::tstzmultirange
244
+ END AS ranges
245
+ FROM future_tzs ftz
246
+ LEFT JOIN existing_part_ranges epr ON ftz.range && epr.range
247
+ )
248
+ select ARRAY_AGG(u.range) FROM diffs
249
+ CROSS JOIN LATERAL unnest(diffs.ranges) AS u(range)
250
+ INTO ranges_to_create;
251
+
252
+ ranges_to_create := COALESCE(ranges_to_create, ARRAY[]::tstzrange[]);
253
+
254
+ -- go from now to future_interval
255
+ FOREACH cur_range IN ARRAY ranges_to_create LOOP
256
+ DECLARE
257
+ start_ev_id event_id := pgmb.create_event_id(lower(cur_range), 0);
258
+ end_ev_id event_id := pgmb.create_event_id(upper(cur_range), 0);
259
+ pt_name TEXT := pgmb.get_time_partition_name(table_id, lower(cur_range));
260
+ BEGIN
261
+ RAISE NOTICE 'creating partition "%". start: %, end: %',
262
+ pt_name, lower(cur_range), upper(cur_range);
263
+
264
+ EXECUTE FORMAT(
265
+ 'CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
266
+ pt_name, table_id, start_ev_id, end_ev_id
267
+ );
268
+
269
+ IF additional_sql IS NOT NULL THEN
270
+ EXECUTE REPLACE(additional_sql, '$1', pt_name);
271
+ END IF;
272
+ END;
273
+ END LOOP;
274
+
275
+ -- Drop old partitions
276
+ FOR p_info IN (
277
+ SELECT inhrelid::regclass AS child
278
+ FROM pg_catalog.pg_inherits
279
+ WHERE inhparent = table_id
280
+ AND inhrelid::regclass::text < oldest_pt_to_keep
281
+ ) LOOP
282
+ EXECUTE format('DROP TABLE %I', p_info.child);
283
+ END LOOP;
284
+ END;
285
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE SECURITY DEFINER;
286
+
287
+ CREATE OR REPLACE FUNCTION get_current_partition(
288
+ table_id regclass,
289
+ current_ts timestamptz DEFAULT NOW()
290
+ ) RETURNS regclass AS $$
291
+ SELECT inhrelid::regclass
292
+ FROM pg_catalog.pg_inherits
293
+ WHERE inhparent = table_id
294
+ AND inhrelid::regclass::text
295
+ <= pgmb.get_time_partition_name(table_id, current_ts)
296
+ ORDER BY inhrelid DESC
297
+ LIMIT 1
298
+ $$ LANGUAGE sql STABLE PARALLEL SAFE SECURITY DEFINER;
299
+
300
+ -- subscriptions table and related functions ----------------
301
+
302
+ CREATE DOMAIN group_id AS VARCHAR(48);
303
+
304
+ CREATE OR REPLACE FUNCTION create_subscription_id()
305
+ RETURNS subscription_id AS $$
306
+ SELECT 'su' || substring(
307
+ create_event_id(NOW(), create_random_bigint())
308
+ FROM 3
309
+ );
310
+ $$ LANGUAGE sql VOLATILE STRICT PARALLEL SAFE SECURITY DEFINER
311
+ SET search_path TO pgmb;
312
+
313
+ -- subscription, groups tables and functions will go here ----------------
314
+
315
+ CREATE TABLE subscription_groups(
316
+ id group_id PRIMARY KEY,
317
+ created_at TIMESTAMPTZ DEFAULT NOW(),
318
+ last_read_event_id event_id DEFAULT create_event_id(NOW(), 0)
319
+ );
320
+
321
+ CREATE TABLE subscriptions (
322
+ -- unique identifier for the subscription
323
+ id subscription_id PRIMARY KEY DEFAULT create_subscription_id(),
324
+ -- define how the subscription is grouped. subscriptions belonging
325
+ -- to the same group are read in one batch.
326
+ group_id group_id NOT NULL REFERENCES subscription_groups(id)
327
+ ON DELETE RESTRICT,
328
+ -- A SQL expression that will be used to filter events for this subscription.
329
+ -- The events table will be aliased as "e" in this expression. The subscription
330
+ -- table is available as "s".
331
+ -- Example: "e.topic = s.metadata->>'topic'",
332
+ conditions_sql TEXT NOT NULL DEFAULT 'TRUE',
333
+ -- params will be indexed, and can be used to store
334
+ -- additional parameters for the subscription's conditions_sql.
335
+ -- It's more efficient to have the same conditions_sql for multiple
336
+ -- subscriptions, and differentiate them using params.
337
+ params JSONB NOT NULL DEFAULT '{}'::jsonb,
338
+
339
+ identity bigint GENERATED ALWAYS AS (
340
+ hashtext(
341
+ group_id
342
+ || '/' || conditions_sql
343
+ || '/' || jsonb_hash(params)::text
344
+ )
345
+ ) STORED UNIQUE,
346
+ -- when was this subscription last active
347
+ last_active_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
348
+ -- how long before this subscription expires since
349
+ -- its last_active_at time. NULL means it never expires.
350
+ expiry_interval INTERVAL
351
+ );
352
+
353
+ -- immutable fn to add interval to timestamptz
354
+ CREATE FUNCTION add_interval_imm(tstz TIMESTAMPTZ, itvl INTERVAL)
355
+ RETURNS TIMESTAMPTZ AS $$
356
+ SELECT tstz + itvl;
357
+ $$ LANGUAGE sql IMMUTABLE PARALLEL SAFE
358
+ SET search_path TO pgmb;
359
+
360
+ -- note: index to quickly find expired subscriptions, not creating
361
+ -- a column separately because there's some weird deadlock issue
362
+ -- when creating a separate generated "expires_at" column.
363
+ CREATE INDEX ON subscriptions(
364
+ group_id,
365
+ add_interval_imm(last_active_at, expiry_interval)
366
+ ) WHERE expiry_interval IS NOT NULL;
367
+
368
+ DO $$
369
+ DECLARE
370
+ has_btree_gin BOOLEAN;
371
+ BEGIN
372
+ has_btree_gin := (
373
+ SELECT EXISTS (
374
+ SELECT 1
375
+ FROM pg_available_extensions
376
+ WHERE name = 'btree_gin'
377
+ )
378
+ );
379
+ -- create btree_gin extension if not exists, if the extension
380
+ -- is not available, we create a simpler regular GIN index instead.
381
+ IF has_btree_gin THEN
382
+ CREATE EXTENSION IF NOT EXISTS btree_gin;
383
+ -- fastupdate=false, slows down subscription creation, but ensures the costlier
384
+ -- "poll_for_events" function is executed faster.
385
+ CREATE INDEX "sub_gin" ON subscriptions USING GIN(conditions_sql, params)
386
+ WITH (fastupdate = false);
387
+ ELSE
388
+ RAISE NOTICE 'btree_gin extension is not available, using
389
+ regular GIN index for subscriptions.params';
390
+ CREATE INDEX "sub_gin" ON subscriptions USING GIN(params)
391
+ WITH (fastupdate = false);
392
+ END IF;
393
+ END
394
+ $$;
395
+
396
+ -- materialized view to hold distinct conditions_sql statements.
397
+ -- We utilise changes in this view to determine when to prepare the
398
+ -- "poll_for_events" function.
399
+ CREATE MATERIALIZED VIEW IF NOT EXISTS subscription_cond_sqls AS (
400
+ SELECT DISTINCT conditions_sql FROM subscriptions
401
+ ORDER BY conditions_sql
402
+ );
403
+
404
+ CREATE UNIQUE INDEX IF NOT EXISTS
405
+ subscription_cond_sqls_idx ON subscription_cond_sqls(conditions_sql);
406
+
407
+ -- subscription events holds the events dispatched to each subscription
408
+ -- for each group. Like events, this is also an insert-only table. Groups
409
+ -- move their cursors forward as they read events. We can implement very safe
410
+ -- cursor movement as only a single writer (poll_for_events) writes to this table,
411
+ -- and multiple readers read from it.
412
+ CREATE TABLE IF NOT EXISTS subscription_events(
413
+ id event_id,
414
+ group_id group_id,
415
+ event_id event_id,
416
+ subscription_id subscription_id
417
+ ) PARTITION BY RANGE (id);
418
+
419
+ CREATE INDEX IF NOT EXISTS subscription_events_group_idx
420
+ ON subscription_events(group_id, id);
421
+
422
+ -- Create a role with minimal access to the database. As we deal with
423
+ -- custom SQL quite often, we don't want an accidentally malicious or bad
424
+ -- SQL to have too much access to the database.
425
+ DO $$
426
+ BEGIN
427
+ IF NOT EXISTS (
428
+ SELECT 1 FROM pg_roles WHERE rolname = 'pgmb_reader'
429
+ ) THEN
430
+ CREATE ROLE pgmb_reader NOLOGIN NOSUPERUSER NOCREATEDB
431
+ NOCREATEROLE NOINHERIT NOREPLICATION;
432
+ END IF;
433
+ END
434
+ $$;
435
+
436
+ -- Give schema usage access
437
+ GRANT USAGE, CREATE ON SCHEMA pgmb TO pgmb_reader;
438
+ GRANT SELECT ON TABLE events TO pgmb_reader;
439
+ GRANT SELECT ON TABLE config TO pgmb_reader;
440
+ GRANT SELECT ON TABLE subscriptions TO pgmb_reader;
441
+ GRANT SELECT, UPDATE, DELETE ON TABLE unread_events TO pgmb_reader;
442
+ -- Grant insert-only access to "subscription_events"
443
+ GRANT UPDATE, INSERT ON TABLE subscription_events TO pgmb_reader;
444
+
445
+ SET ROLE pgmb_reader;
446
+ -- This trigger puts the conditions_sql through a syntax check
447
+ CREATE OR REPLACE FUNCTION validate_subscription_conditions_sql()
448
+ RETURNS TRIGGER AS $$
449
+ BEGIN
450
+ EXECUTE 'SELECT * FROM jsonb_populate_recordset(NULL::pgmb.events, ''[]'') e
451
+ INNER JOIN jsonb_populate_recordset(NULL::pgmb.subscriptions, ''[{}]'') s
452
+ ON ' || NEW.conditions_sql;
453
+ RETURN NEW;
454
+ END;
455
+ $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
456
+ SET search_path TO pgmb
457
+ SECURITY DEFINER;
458
+ RESET ROLE;
459
+
460
+ CREATE TRIGGER validate_subscription_conditions_sql_trigger
461
+ BEFORE INSERT OR UPDATE ON subscriptions
462
+ FOR EACH ROW
463
+ EXECUTE FUNCTION validate_subscription_conditions_sql();
464
+
465
+ -- poll_for_events function template. As we add/remove different subscriptions,
466
+ -- we'll prepare a new version of the "poll_for_events" function with
467
+ -- the updated "conditions_sql" statements.
468
+ -- A template function is used so we get syntax highlighting and checking
469
+ -- when editing this function.
470
+ CREATE OR REPLACE FUNCTION poll_for_events_tmpl()
471
+ RETURNS INT AS $body$
472
+ DECLARE
473
+ read_ids event_id[];
474
+ max_id event_id;
475
+ min_id event_id;
476
+
477
+ chunk_size INT := get_config_value('poll_chunk_size')::INT;
478
+
479
+ inserted_rows integer;
480
+
481
+ start_num BIGINT := create_random_bigint();
482
+ write_start TIMESTAMPTZ;
483
+
484
+ lock_key CONSTANT BIGINT :=
485
+ hashtext('pgmb.poll_for_events');
486
+ BEGIN
487
+ IF NOT pg_try_advisory_xact_lock(lock_key) THEN
488
+ -- another process is already polling for events
489
+ RETURN 0;
490
+ END IF;
491
+
492
+ WITH to_delete AS (
493
+ SELECT td.event_id
494
+ FROM unread_events td
495
+ WHERE td.event_id < create_event_id(NOW(), 0)
496
+ FOR UPDATE SKIP LOCKED
497
+ LIMIT chunk_size
498
+ ),
499
+ deleted AS (
500
+ DELETE FROM unread_events re
501
+ USING to_delete td
502
+ WHERE re.event_id = td.event_id
503
+ )
504
+ SELECT
505
+ MAX(event_id),
506
+ MIN(event_id),
507
+ ARRAY_AGG(event_id)
508
+ INTO max_id, min_id, read_ids
509
+ FROM to_delete;
510
+
511
+ IF max_id IS NULL THEN
512
+ RETURN 0;
513
+ END IF;
514
+
515
+ write_start := clock_timestamp();
516
+
517
+ WITH read_events AS (
518
+ SELECT e.*
519
+ FROM events e
520
+ INNER JOIN unnest(read_ids) r(id) ON e.id = r.id
521
+ WHERE e.id <= max_id AND e.id >= min_id
522
+ )
523
+ INSERT INTO subscription_events(id, group_id, subscription_id, event_id)
524
+ SELECT
525
+ create_event_id(write_start, start_num + row_number() OVER ()),
526
+ s.group_id,
527
+ s.id,
528
+ e.id
529
+ FROM read_events e
530
+ INNER JOIN subscriptions s ON
531
+ s.id = e.subscription_id
532
+ OR (
533
+ e.subscription_id IS NULL
534
+ AND (
535
+ -- Do not edit this line directly. Will be replaced
536
+ -- in the prepared function.
537
+ TRUE -- CONDITIONS_SQL_PLACEHOLDER --
538
+ )
539
+ )
540
+ ON CONFLICT DO NOTHING;
541
+
542
+ GET DIAGNOSTICS inserted_rows = ROW_COUNT;
543
+
544
+ -- return total inserted events
545
+ RETURN inserted_rows;
546
+ END;
547
+ $body$ LANGUAGE plpgsql VOLATILE STRICT PARALLEL UNSAFE
548
+ SET search_path TO pgmb
549
+ SECURITY DEFINER;
550
+
551
+ CREATE OR REPLACE FUNCTION prepare_poll_for_events_fn(
552
+ sql_statements TEXT[]
553
+ ) RETURNS VOID AS $$
554
+ DECLARE
555
+ tmpl_proc_name constant TEXT :=
556
+ 'poll_for_events_tmpl';
557
+ tmpl_proc_placeholder constant TEXT :=
558
+ 'TRUE -- CONDITIONS_SQL_PLACEHOLDER --';
559
+ condition_sql TEXT;
560
+ proc_src TEXT;
561
+ BEGIN
562
+ IF sql_statements = '{}' THEN
563
+ -- no subscriptions, so just use 'FALSE' to avoid any matches
564
+ sql_statements := ARRAY['FALSE'];
565
+ END IF;
566
+ -- build the condition SQL
567
+ condition_sql := FORMAT(
568
+ '('
569
+ || array_to_string(
570
+ ARRAY(
571
+ SELECT
572
+ '(' || stmt || ') AND s.conditions_sql = %L'
573
+ FROM unnest(sql_statements) AS arr(stmt)
574
+ ),
575
+ ') OR ('
576
+ )
577
+ || ')',
578
+ VARIADIC sql_statements
579
+ );
580
+ condition_sql := FORMAT('/* updated at %s */', NOW()) || condition_sql;
581
+
582
+ -- fetch the source of the template procedure
583
+ select pg_get_functiondef(oid) INTO proc_src
584
+ from pg_proc where proname = tmpl_proc_name and
585
+ pronamespace = 'pgmb'::regnamespace;
586
+ IF proc_src IS NULL THEN
587
+ RAISE EXCEPTION 'Template procedure % not found', tmpl_proc_name;
588
+ END IF;
589
+
590
+ -- replace the placeholder with the actual condition SQL
591
+ proc_src := REPLACE(proc_src, tmpl_proc_placeholder, condition_sql);
592
+ proc_src := REPLACE(proc_src, tmpl_proc_name, 'poll_for_events');
593
+
594
+ -- the new poll_for_events function will be created with
595
+ -- the pgmb_reader role, to avoid a bad "conditions_sql"
596
+ -- from having any destructive access to the database.
597
+ SET ROLE pgmb_reader;
598
+ EXECUTE proc_src;
599
+ RESET ROLE;
600
+ END;
601
+ $$ LANGUAGE plpgsql VOLATILE STRICT PARALLEL UNSAFE
602
+ SET search_path TO pgmb
603
+ SECURITY INVOKER;
604
+
605
+ SELECT prepare_poll_for_events_fn('{}'::text[]);
606
+
607
+ -- we'll prepare the subscription read statement whenever subscriptions
608
+ -- are created/updated/deleted
609
+ CREATE OR REPLACE FUNCTION refresh_subscription_read_statements()
610
+ RETURNS TRIGGER AS $$
611
+ DECLARE
612
+ needs_refresh BOOLEAN := FALSE;
613
+ old_conditions_sql TEXT[];
614
+ conditions_sql TEXT[];
615
+ BEGIN
616
+ old_conditions_sql := ARRAY(
617
+ SELECT * FROM subscription_cond_sqls
618
+ ORDER BY conditions_sql
619
+ );
620
+
621
+ REFRESH MATERIALIZED VIEW CONCURRENTLY subscription_cond_sqls;
622
+
623
+ conditions_sql := ARRAY(
624
+ SELECT * FROM subscription_cond_sqls
625
+ ORDER BY conditions_sql
626
+ );
627
+
628
+ -- conditions_sql hasn't changed, no need to refresh the
629
+ -- poll_for_events function
630
+ IF conditions_sql = old_conditions_sql THEN
631
+ RETURN NULL;
632
+ END IF;
633
+
634
+ PERFORM prepare_poll_for_events_fn(conditions_sql);
635
+ RETURN NULL;
636
+ END
637
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
638
+ SET search_path TO pgmb
639
+ SECURITY INVOKER;
640
+
641
+ CREATE TRIGGER refresh_subscription_read_statements_trigger
642
+ AFTER INSERT OR UPDATE OR DELETE ON subscriptions
643
+ FOR EACH STATEMENT
644
+ EXECUTE FUNCTION refresh_subscription_read_statements();
645
+
646
+ -- Utility fn to read events by their IDs. This exists as postgres
647
+ -- doesn't correctly filter which partitions to read from when using
648
+ -- an IN/JOIN clause on a partitioned table.
649
+ CREATE OR REPLACE FUNCTION read_events(
650
+ event_ids event_id[]
651
+ ) RETURNS SETOF events AS $$
652
+ DECLARE
653
+ max_id event_id;
654
+ min_id event_id;
655
+ BEGIN
656
+ IF array_length(event_ids, 1) = 0 THEN
657
+ RETURN;
658
+ END IF;
659
+
660
+ -- get min and max ids, allows PG to correctly prune partitions
661
+ SELECT
662
+ MAX(eid),
663
+ MIN(eid)
664
+ INTO max_id, min_id
665
+ FROM unnest(event_ids) AS u(eid);
666
+
667
+ RETURN QUERY
668
+ SELECT e.*
669
+ FROM events e
670
+ INNER JOIN unnest(event_ids) AS u(eid) ON e.id = u.eid
671
+ WHERE e.id <= max_id AND e.id >= min_id
672
+ ORDER BY u.eid;
673
+ END;
674
+ $$ LANGUAGE plpgsql STRICT STABLE PARALLEL SAFE
675
+ SET search_path TO pgmb;
676
+
677
+ -- fn to read next events for a subscription group
678
+ CREATE OR REPLACE FUNCTION read_next_events(
679
+ gid VARCHAR(48),
680
+ cursor event_id DEFAULT NULL,
681
+ chunk_size INT DEFAULT get_config_value('poll_chunk_size')::INT,
682
+ -- if peek is true, we do not require having to acquire the advisory
683
+ -- lock to read events. Useful to debug without blocking other readers.
684
+ peek BOOLEAN DEFAULT FALSE
685
+ ) RETURNS TABLE(
686
+ id event_id,
687
+ topic VARCHAR(255),
688
+ payload JSONB,
689
+ metadata JSONB,
690
+ subscription_ids subscription_id[],
691
+ next_cursor event_id
692
+ ) AS $$
693
+ DECLARE
694
+ lock_key CONSTANT BIGINT :=
695
+ hashtext('pgmb.read_next_events.' || gid);
696
+ BEGIN
697
+ -- provide a lock for the group, so that if we temporarily
698
+ -- or accidentally have multiple readers for the same group,
699
+ -- they don't interfere with each other.
700
+ IF NOT pg_try_advisory_lock(lock_key) AND NOT peek THEN
701
+ RETURN;
702
+ END IF;
703
+ -- fetch the cursor to read from
704
+ -- if no cursor is provided, fetch from the group's last read event id
705
+ IF cursor IS NULL THEN
706
+ SELECT sc.last_read_event_id
707
+ FROM subscription_groups sc
708
+ WHERE sc.id = gid
709
+ INTO cursor;
710
+ END IF;
711
+ -- if still null, don't return anything
712
+ IF cursor IS NULL THEN
713
+ RETURN;
714
+ END IF;
715
+
716
+ RETURN QUERY WITH next_events AS (
717
+ SELECT
718
+ se.id,
719
+ se.event_id,
720
+ se.subscription_id
721
+ FROM subscription_events se
722
+ INNER JOIN subscriptions s ON s.id = se.subscription_id
723
+ WHERE se.group_id = gid
724
+ AND se.id < create_event_id(NOW(), 0)
725
+ AND se.id > cursor
726
+ LIMIT chunk_size
727
+ ),
728
+ next_events_grp AS (
729
+ SELECT
730
+ ne.event_id,
731
+ ARRAY_AGG(ne.subscription_id) AS subscription_ids
732
+ FROM next_events ne
733
+ GROUP BY ne.event_id
734
+ ORDER BY ne.event_id
735
+ )
736
+ SELECT
737
+ e.id,
738
+ e.topic,
739
+ e.payload,
740
+ e.metadata,
741
+ ne.subscription_ids,
742
+ (SELECT MAX(ne2.id)::event_id FROM next_events ne2)
743
+ FROM read_events(ARRAY(SELECT ne.event_id FROM next_events_grp ne)) e
744
+ INNER JOIN next_events_grp ne ON ne.event_id = e.id;
745
+ END
746
+ $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
747
+ SET search_path TO pgmb
748
+ SECURITY INVOKER;
749
+
750
+ CREATE OR REPLACE FUNCTION replay_events(
751
+ gid VARCHAR(48),
752
+ sid VARCHAR(24),
753
+ from_event_id event_id,
754
+ max_events INT
755
+ ) RETURNS SETOF events AS $$
756
+ DECLARE
757
+ event_ids event_id[];
758
+ now_id event_id := create_event_id(NOW(), 0);
759
+ BEGIN
760
+ SELECT ARRAY_AGG(se.event_id) INTO event_ids
761
+ FROM subscription_events se
762
+ WHERE se.group_id = gid
763
+ AND se.subscription_id = sid
764
+ AND se.event_id > from_event_id
765
+ AND se.event_id <= now_id
766
+ -- we filter "id" by the same range too, because
767
+ -- 1. the format of se.id and e.id are the same. And rows are
768
+ -- inserted into the se table after the corresponding e row is created,
769
+ -- so if we find rows > from_event_id in se.event_id, the corresponding
770
+ -- e.id will also be > from_event_id
771
+ -- 2. it helps postgres prune which partitions to read from
772
+ AND se.id <= now_id
773
+ AND se.id > from_event_id
774
+ LIMIT (max_events + 1);
775
+ IF array_length(event_ids, 1) > max_events THEN
776
+ RAISE EXCEPTION
777
+ 'Too many events to replay. Please replay in smaller batches.';
778
+ END IF;
779
+
780
+ RETURN QUERY SELECT * FROM read_events(event_ids);
781
+ END $$ LANGUAGE plpgsql STABLE PARALLEL SAFE
782
+ SET search_path TO pgmb
783
+ SECURITY INVOKER;
784
+
785
+ CREATE OR REPLACE FUNCTION release_group_lock(gid VARCHAR(48))
786
+ RETURNS VOID AS $$
787
+ DECLARE
788
+ lock_key CONSTANT BIGINT :=
789
+ hashtext('pgmb.read_next_events.' || gid);
790
+ BEGIN
791
+ PERFORM pg_advisory_unlock(lock_key);
792
+ END
793
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
794
+ SET search_path TO pgmb;
795
+
796
+ -- upsert the group's cursor
797
+ CREATE OR REPLACE FUNCTION set_group_cursor(
798
+ gid VARCHAR(48),
799
+ new_cursor event_id,
800
+ -- if true, release any existing lock for this group
801
+ release_lock BOOLEAN
802
+ ) RETURNS VOID AS $$
803
+ BEGIN
804
+ -- upsert the new cursor
805
+ INSERT INTO subscription_groups(id, last_read_event_id)
806
+ VALUES (gid, new_cursor)
807
+ ON CONFLICT (id) DO UPDATE
808
+ SET last_read_event_id = EXCLUDED.last_read_event_id;
809
+
810
+ -- release any existing lock for this group, if we hold one
811
+ IF release_lock THEN
812
+ PERFORM release_group_lock(gid);
813
+ END IF;
814
+ END
815
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
816
+ SET search_path TO pgmb;
817
+
818
+ CREATE OR REPLACE FUNCTION maintain_events_table(
819
+ current_ts timestamptz DEFAULT NOW()
820
+ )
821
+ RETURNS VOID AS $$
822
+ DECLARE
823
+ pi INTERVAL := get_config_value('partition_interval');
824
+ fic INTERVAL := get_config_value('future_intervals_to_create');
825
+ rp INTERVAL := get_config_value('partition_retention_period');
826
+ BEGIN
827
+ PERFORM maintain_time_partitions_using_event_id(
828
+ 'pgmb.events'::regclass,
829
+ partition_interval := pi,
830
+ future_interval := fic,
831
+ retention_period := rp,
832
+ -- turn off autovacuum on the events table, since we're not
833
+ -- going to be updating/deleting rows from it.
834
+ -- Also set fillfactor to 100 since we're only inserting.
835
+ additional_sql := 'ALTER TABLE $1 SET(
836
+ fillfactor = 100,
837
+ autovacuum_enabled = false,
838
+ toast.autovacuum_enabled = false
839
+ );',
840
+ current_ts := current_ts
841
+ );
842
+
843
+ PERFORM maintain_time_partitions_using_event_id(
844
+ 'pgmb.subscription_events'::regclass,
845
+ partition_interval := pi,
846
+ future_interval := fic,
847
+ retention_period := rp,
848
+ -- turn off autovacuum on the events table, since we're not
849
+ -- going to be updating/deleting rows from it.
850
+ -- Also set fillfactor to 100 since we're only inserting.
851
+ additional_sql := 'ALTER TABLE $1 SET(
852
+ fillfactor = 100,
853
+ autovacuum_enabled = false,
854
+ toast.autovacuum_enabled = false
855
+ );',
856
+ current_ts := current_ts
857
+ );
858
+ END;
859
+ $$ LANGUAGE plpgsql VOLATILE PARALLEL UNSAFE
860
+ SET search_path TO pgmb;
861
+
862
+ -- create the initial partitions
863
+ SELECT maintain_events_table();
864
+
865
+ -- triggers to add events for specific tables ---------------------------
866
+
867
+ -- Function to create a topic string for subscriptions.
868
+ -- Eg. "public" "contacts" "INSERT" -> "public.contacts.insert"
869
+ CREATE OR REPLACE FUNCTION create_topic(
870
+ schema_name name,
871
+ table_name name,
872
+ kind varchar(16)
873
+ ) RETURNS varchar(255) AS $$
874
+ SELECT lower(schema_name || '.' || table_name || '.' || kind)
875
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
876
+
877
+ -- Creates a function to compute the difference between two JSONB objects
878
+ -- Treats 'null' values, and non-existent keys as equal
879
+ -- Eg. jsonb_diff('{"a": 1, "b": 2, "c": null}', '{"a": 1, "b": null}') = '{"b": 2}'
880
+ CREATE OR REPLACE FUNCTION jsonb_diff(a jsonb, b jsonb)
881
+ RETURNS jsonb AS $$
882
+ SELECT jsonb_object_agg(key, value) FROM (
883
+ SELECT key, value FROM jsonb_each(a) WHERE value != 'null'::jsonb
884
+ EXCEPT
885
+ SELECT key, value FROM jsonb_each(b) WHERE value != 'null'::jsonb
886
+ )
887
+ $$ LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE;
888
+
889
+ -- Function to serialise a record for an event, and tell us
890
+ -- whether to emit the event or not.
891
+ -- Note: Regardless of whether to emit the event, the serialised
892
+ -- JSONB is returned.
893
+ -- By default, we always emit the event.
894
+ CREATE OR REPLACE FUNCTION serialise_record_for_event(
895
+ tabl oid,
896
+ op TEXT,
897
+ record RECORD,
898
+ serialised OUT JSONB,
899
+ emit OUT BOOLEAN
900
+ ) AS $$
901
+ BEGIN
902
+ serialised := to_jsonb(record);
903
+ emit := TRUE;
904
+ RETURN;
905
+ END
906
+ $$ LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE
907
+ SECURITY INVOKER;
908
+
909
+ -- Trigger that pushes changes to the events table
910
+ CREATE OR REPLACE FUNCTION push_table_event()
911
+ RETURNS TRIGGER AS $$
912
+ DECLARE
913
+ start_num BIGINT = create_random_bigint();
914
+ BEGIN
915
+ IF TG_OP = 'INSERT' THEN
916
+ INSERT INTO events(id, topic, payload)
917
+ SELECT
918
+ create_event_id(clock_timestamp(), rand := start_num + row_number() OVER ()),
919
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
920
+ jsonb_strip_nulls(s.data)
921
+ FROM NEW n
922
+ CROSS JOIN LATERAL
923
+ serialise_record_for_event(TG_RELID, TG_OP, n) AS s(data, emit)
924
+ WHERE s.emit;
925
+ ELSIF TG_OP = 'DELETE' THEN
926
+ INSERT INTO events(id, topic, payload)
927
+ SELECT
928
+ create_event_id(clock_timestamp(), rand := start_num + row_number() OVER ()),
929
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
930
+ jsonb_strip_nulls(to_jsonb(s.data))
931
+ FROM OLD o
932
+ CROSS JOIN LATERAL
933
+ serialise_record_for_event(TG_RELID, TG_OP, o) AS s(data, emit)
934
+ WHERE s.emit;
935
+ ELSIF TG_OP = 'UPDATE' THEN
936
+ -- For updates, we can send both old and new data
937
+ INSERT INTO events(id, topic, payload, metadata)
938
+ SELECT
939
+ create_event_id(clock_timestamp(), rand := start_num + n.rn),
940
+ create_topic(TG_TABLE_SCHEMA, TG_TABLE_NAME, TG_OP),
941
+ jsonb_strip_nulls(jsonb_diff(n.data, o.data)),
942
+ jsonb_build_object('old', jsonb_strip_nulls(o.data))
943
+ FROM (
944
+ SELECT s.data, s.emit, row_number() OVER () AS rn
945
+ FROM NEW n
946
+ CROSS JOIN LATERAL
947
+ serialise_record_for_event(TG_RELID, TG_OP, n) AS s(data, emit)
948
+ ) AS n
949
+ INNER JOIN (
950
+ SELECT s.data, row_number() OVER () AS rn FROM OLD o
951
+ CROSS JOIN LATERAL
952
+ serialise_record_for_event(TG_RELID, TG_OP, o) AS s(data, emit)
953
+ ) AS o ON n.rn = o.rn
954
+ -- ignore rows where data didn't change
955
+ WHERE n.data IS DISTINCT FROM o.data AND n.emit;
956
+ END IF;
957
+
958
+ RETURN NULL;
959
+ END
960
+ $$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE PARALLEL UNSAFE
961
+ SET search_path TO pgmb;
962
+
963
+ -- Pushes table mutations to the events table. I.e. makes the table subscribable.
964
+ -- and creates triggers to push changes to the events table.
965
+ CREATE OR REPLACE FUNCTION push_table_mutations(
966
+ tbl regclass,
967
+ insert BOOLEAN DEFAULT TRUE,
968
+ delete BOOLEAN DEFAULT TRUE,
969
+ update BOOLEAN DEFAULT TRUE
970
+ )
971
+ RETURNS VOID AS $$
972
+ BEGIN
973
+ IF insert THEN
974
+ -- Create a trigger to push changes to the subscriptions queue
975
+ BEGIN
976
+ EXECUTE 'CREATE TRIGGER
977
+ post_insert_event
978
+ AFTER INSERT ON ' || tbl::varchar || '
979
+ REFERENCING NEW TABLE AS NEW
980
+ FOR EACH STATEMENT
981
+ EXECUTE FUNCTION push_table_event();';
982
+ EXCEPTION WHEN duplicate_object THEN
983
+ NULL;
984
+ END;
985
+ END IF;
986
+
987
+ IF delete THEN
988
+ BEGIN
989
+ EXECUTE 'CREATE TRIGGER
990
+ post_delete_event
991
+ AFTER DELETE ON ' || tbl::varchar || '
992
+ REFERENCING OLD TABLE AS OLD
993
+ FOR EACH STATEMENT
994
+ EXECUTE FUNCTION push_table_event();';
995
+ EXCEPTION WHEN duplicate_object THEN
996
+ NULL;
997
+ END;
998
+ END IF;
999
+
1000
+ IF update THEN
1001
+ BEGIN
1002
+ EXECUTE 'CREATE TRIGGER
1003
+ post_update_event
1004
+ AFTER UPDATE ON ' || tbl::varchar || '
1005
+ REFERENCING OLD TABLE AS OLD
1006
+ NEW TABLE AS NEW
1007
+ FOR EACH STATEMENT
1008
+ EXECUTE FUNCTION push_table_event();';
1009
+ EXCEPTION WHEN duplicate_object THEN
1010
+ NULL;
1011
+ END;
1012
+ END IF;
1013
+ END
1014
+ $$ LANGUAGE plpgsql SECURITY DEFINER
1015
+ VOLATILE PARALLEL UNSAFE
1016
+ SET search_path TO pgmb;
1017
+
1018
+ -- Stops the table from being subscribable.
1019
+ -- I.e removes the triggers that push changes to the events table.
1020
+ CREATE OR REPLACE FUNCTION stop_table_mutations_push(
1021
+ tbl regclass
1022
+ ) RETURNS VOID AS $$
1023
+ BEGIN
1024
+ -- Remove the triggers for the table
1025
+ EXECUTE 'DROP TRIGGER IF EXISTS post_insert_event ON ' || tbl::varchar || ';';
1026
+ EXECUTE 'DROP TRIGGER IF EXISTS post_delete_event ON ' || tbl::varchar || ';';
1027
+ EXECUTE 'DROP TRIGGER IF EXISTS post_update_event ON ' || tbl::varchar || ';';
1028
+ END
1029
+ $$ LANGUAGE plpgsql SECURITY DEFINER VOLATILE
1030
+ SET search_path TO pgmb;