pgoutput-client 0.0.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,187 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ # Immutable configuration for a PostgreSQL logical replication stream.
6
+ #
7
+ # A configuration describes how `pgoutput-client` should connect to
8
+ # PostgreSQL and how it should request logical replication from the server.
9
+ # It deliberately contains transport-level settings only; parsing pgoutput
10
+ # records and decoding PostgreSQL values belong to downstream layers.
11
+ #
12
+ # The object freezes itself and its string/array attributes during
13
+ # initialization so it can be safely shared by transport components without
14
+ # defensive copying.
15
+ #
16
+ # @example Minimal configuration
17
+ # config = Pgoutput::Client::Configuration.new(
18
+ # database_url: "postgres://localhost/app",
19
+ # slot_name: "cdc_slot",
20
+ # publication_names: "app_publication"
21
+ # )
22
+ #
23
+ # @example Start from a known LSN and request binary values from pgoutput
24
+ # config = Pgoutput::Client::Configuration.new(
25
+ # database_url: ENV.fetch("DATABASE_URL"),
26
+ # slot_name: "cdc_slot",
27
+ # publication_names: %w[app_publication],
28
+ # start_lsn: "0/16B6C50",
29
+ # binary: true
30
+ # )
31
+ #
32
+ # @api public
33
+ class Configuration
34
+ # Fixed logical decoding output plugin.
35
+ #
36
+ # @return [String]
37
+ DEFAULT_PLUGIN = "pgoutput"
38
+
39
+ # Default pgoutput protocol version.
40
+ #
41
+ # @return [Integer]
42
+ DEFAULT_PROTO_VERSION = 1
43
+
44
+ # Default interval, in seconds, between standby status feedback messages.
45
+ #
46
+ # @return [Float]
47
+ DEFAULT_FEEDBACK_INTERVAL = 10.0
48
+
49
+ # @!attribute [r] database_url
50
+ # PostgreSQL connection URL.
51
+ # @return [String]
52
+ # @!attribute [r] slot_name
53
+ # Logical replication slot name.
54
+ # @return [String]
55
+ # @!attribute [r] publication_names
56
+ # Publication names requested from pgoutput.
57
+ # @return [Array<String>]
58
+ # @!attribute [r] start_lsn
59
+ # Optional normalized starting LSN.
60
+ # @return [String, nil]
61
+ # @!attribute [r] proto_version
62
+ # pgoutput protocol version.
63
+ # @return [Integer]
64
+ # @!attribute [r] binary
65
+ # Whether to request binary column values from pgoutput.
66
+ # @return [Boolean]
67
+ # @!attribute [r] messages
68
+ # Whether to request logical decoding messages from pgoutput.
69
+ # @return [Boolean]
70
+ # @!attribute [r] auto_create_slot
71
+ # Whether the client should create the slot before streaming.
72
+ # @return [Boolean]
73
+ # @!attribute [r] temporary_slot
74
+ # Whether a newly created slot should be temporary.
75
+ # @return [Boolean]
76
+ # @!attribute [r] feedback_interval
77
+ # Standby feedback interval in seconds.
78
+ # @return [Float]
79
+ attr_reader :database_url,
80
+ :slot_name,
81
+ :publication_names,
82
+ :start_lsn,
83
+ :proto_version,
84
+ :binary,
85
+ :messages,
86
+ :auto_create_slot,
87
+ :temporary_slot,
88
+ :feedback_interval
89
+
90
+ # Build and validate a logical replication stream configuration.
91
+ #
92
+ # `slot_name` and every publication name are intentionally limited to
93
+ # simple PostgreSQL identifier-like strings. This keeps command rendering
94
+ # small and predictable while avoiding quoting rules in this transport
95
+ # layer.
96
+ #
97
+ # Boolean options are normalized with Ruby truthiness. `nil` and `false`
98
+ # become `false`; all other values become `true`.
99
+ #
100
+ # @param database_url [#to_s] PostgreSQL connection URL
101
+ # @param slot_name [#to_s] logical replication slot name
102
+ # @param publication_names [Array<#to_s>, #to_s] one or more publication
103
+ # names to pass to pgoutput
104
+ # @param start_lsn [String, Integer, nil] starting LSN as a PostgreSQL LSN
105
+ # string, an integer WAL position, or `nil` for `0/0`
106
+ # @param proto_version [#to_int, #to_s] pgoutput protocol version
107
+ # @param binary [Object] truthy to request binary column values
108
+ # @param messages [Object] truthy to request logical decoding messages
109
+ # @param auto_create_slot [Object] truthy to create the slot before
110
+ # starting replication
111
+ # @param temporary_slot [Object] truthy to create a temporary replication
112
+ # slot when `auto_create_slot` is enabled
113
+ # @param feedback_interval [#to_f, #to_s] seconds between periodic standby
114
+ # feedback messages
115
+ # @return [void]
116
+ # @raise [ConfigurationError] if publication names are empty or numeric
117
+ # settings are invalid
118
+ # @raise [ArgumentError] if `start_lsn`, `proto_version`, or
119
+ # `feedback_interval` cannot be coerced
120
+ def initialize(database_url:,
121
+ slot_name:,
122
+ publication_names:,
123
+ start_lsn: nil,
124
+ proto_version: DEFAULT_PROTO_VERSION,
125
+ binary: false,
126
+ messages: false,
127
+ auto_create_slot: false,
128
+ temporary_slot: false,
129
+ feedback_interval: DEFAULT_FEEDBACK_INTERVAL)
130
+ @database_url = String(database_url).freeze
131
+ @slot_name = validate_identifier(slot_name, "slot_name").freeze
132
+ @publication_names = Array(publication_names).map do |name|
133
+ validate_identifier(name, "publication_name").freeze
134
+ end.freeze
135
+ @start_lsn = normalize_lsn(start_lsn).freeze
136
+ @proto_version = Integer(proto_version)
137
+ @binary = boolean(binary, "binary")
138
+ @messages = boolean(messages, "messages")
139
+ @auto_create_slot = boolean(auto_create_slot, "auto_create_slot")
140
+ @temporary_slot = boolean(temporary_slot, "temporary_slot")
141
+ @feedback_interval = Float(feedback_interval)
142
+
143
+ validate!
144
+ freeze
145
+ end
146
+
147
+ # Starting LSN to render in `START_REPLICATION`.
148
+ #
149
+ # @return [String] normalized LSN string, defaulting to `"0/0"`
150
+ def start_lsn_string
151
+ start_lsn || "0/0"
152
+ end
153
+
154
+ private
155
+
156
+ def validate!
157
+ raise ConfigurationError, "publication_names must not be empty" if publication_names.empty?
158
+ raise ConfigurationError, "proto_version must be positive" unless proto_version.positive?
159
+ raise ConfigurationError, "feedback_interval must be positive" unless feedback_interval.positive?
160
+ end
161
+
162
+ def normalize_lsn(value)
163
+ return nil if value.nil?
164
+ return LSN.format(value) if value.is_a?(Integer)
165
+
166
+ LSN.format(LSN.parse(String(value)))
167
+ end
168
+
169
+ def validate_identifier(value, field)
170
+ string = String(value)
171
+ unless string.match?(/\A[a-zA-Z_][a-zA-Z0-9_]*\z/)
172
+ raise ConfigurationError, "#{field} must be a PostgreSQL identifier-like string"
173
+ end
174
+
175
+ string
176
+ end
177
+
178
+ # Boolean type checking helper
179
+ def boolean(value, name)
180
+ return true if value == true
181
+ return false if value == false
182
+
183
+ raise ArgumentError, "#{name} must be true or false"
184
+ end
185
+ end
186
+ end
187
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ # Thin wrapper around `PG::Connection` for logical replication operations.
6
+ #
7
+ # `Connection` hides the small amount of PostgreSQL driver plumbing needed by
8
+ # the rest of the transport layer. It opens the connection in replication
9
+ # mode, renders replication commands through {Commands}, and translates
10
+ # `PG::Error` exceptions into {ConnectionError}.
11
+ #
12
+ # @api private
13
+ class Connection
14
+ # Configuration associated with this connection.
15
+ #
16
+ # @return [Configuration]
17
+ attr_reader :configuration
18
+
19
+ # Open a PostgreSQL connection in database replication mode.
20
+ #
21
+ # @param configuration [Configuration] replication configuration
22
+ # @return [Connection] wrapper around an open `PG::Connection`
23
+ # @raise [ConnectionError] if the `pg` gem cannot connect
24
+ def self.open(configuration)
25
+ require "pg"
26
+ connection = PG.connect(configuration.database_url, replication: "database")
27
+ new(configuration:, pg_connection: connection)
28
+ rescue PG::Error => e
29
+ raise ConnectionError, e.message
30
+ end
31
+
32
+ # Build a connection wrapper.
33
+ #
34
+ # This constructor is public primarily for tests and alternative connection
35
+ # factories. Normal callers should use {.open}.
36
+ #
37
+ # @param configuration [Configuration] replication configuration
38
+ # @param pg_connection [PG::Connection] connected PostgreSQL driver object
39
+ # @return [void]
40
+ def initialize(configuration:, pg_connection:)
41
+ @configuration = configuration
42
+ @pg_connection = pg_connection
43
+ end
44
+
45
+ # Execute PostgreSQL's `IDENTIFY_SYSTEM` replication command.
46
+ #
47
+ # @return [PG::Result] server identity result
48
+ # @raise [ConnectionError] if PostgreSQL rejects the command
49
+ def identify_system
50
+ exec("IDENTIFY_SYSTEM")
51
+ end
52
+
53
+ # Create the configured logical replication slot.
54
+ #
55
+ # @return [PG::Result] command result
56
+ # @raise [ConnectionError] if PostgreSQL rejects the command
57
+ def create_replication_slot
58
+ exec(Commands.create_replication_slot(configuration))
59
+ end
60
+
61
+ # Drop the configured logical replication slot.
62
+ #
63
+ # @return [PG::Result] command result
64
+ # @raise [ConnectionError] if PostgreSQL rejects the command
65
+ def drop_replication_slot
66
+ exec(Commands.drop_replication_slot(configuration))
67
+ end
68
+
69
+ # Start streaming logical replication from the configured slot and LSN.
70
+ #
71
+ # @return [PG::Result] command result
72
+ # @raise [ConnectionError] if PostgreSQL rejects the command
73
+ def start_replication
74
+ exec(Commands.start_replication(configuration))
75
+ end
76
+
77
+ # Receive one CopyData payload from the server.
78
+ #
79
+ # The call is non-blocking because the underlying `pg` call receives
80
+ # `false` for its blocking argument. `nil` means no complete CopyData
81
+ # payload is currently available.
82
+ #
83
+ # @return [String, nil] raw CopyData payload or `nil`
84
+ # @raise [ConnectionError] if receiving fails
85
+ def get_copy_data # rubocop:disable Naming/AccessorMethodName
86
+ @pg_connection.get_copy_data(false)
87
+ rescue PG::Error => e
88
+ raise ConnectionError, e.message
89
+ end
90
+
91
+ # Send one CopyData payload to the server.
92
+ #
93
+ # Used for standby status feedback messages.
94
+ #
95
+ # @param payload [String] raw CopyData payload
96
+ # @return [void]
97
+ # @raise [ConnectionError] if sending fails
98
+ def put_copy_data(payload)
99
+ @pg_connection.put_copy_data(payload)
100
+ rescue PG::Error => e
101
+ raise ConnectionError, e.message
102
+ end
103
+
104
+ # Close the PostgreSQL connection if it is still open.
105
+ #
106
+ # @return [void]
107
+ def close
108
+ @pg_connection.close unless @pg_connection.finished?
109
+ end
110
+
111
+ private
112
+
113
+ def exec(sql)
114
+ @pg_connection.exec(sql)
115
+ rescue PG::Error => e
116
+ raise ConnectionError, e.message
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ # Base error class for all pgoutput-client failures.
6
+ #
7
+ # Rescue this class when callers want to handle any error raised by the
8
+ # transport layer without also rescuing unrelated Ruby or PostgreSQL driver
9
+ # exceptions.
10
+ #
11
+ # @api public
12
+ class Error < StandardError; end
13
+
14
+ # Raised when stream configuration is invalid.
15
+ #
16
+ # Examples include an empty publication list, invalid replication slot name,
17
+ # invalid publication name, non-positive protocol version, or non-positive
18
+ # feedback interval.
19
+ #
20
+ # @api public
21
+ class ConfigurationError < Error; end
22
+
23
+ # Raised when a replication protocol envelope cannot be parsed.
24
+ #
25
+ # This error represents malformed or unexpected CopyData payloads at the
26
+ # transport-envelope level. It does not describe pgoutput plugin payload
27
+ # parsing errors; those belong to the parser layer.
28
+ #
29
+ # @api public
30
+ class ProtocolError < Error; end
31
+
32
+ # Raised when a PostgreSQL replication connection operation fails.
33
+ #
34
+ # `Connection` converts `PG::Error` instances into this error so public
35
+ # callers do not need to depend on the PostgreSQL driver's exception classes
36
+ # for transport-level handling.
37
+ #
38
+ # @api public
39
+ class ConnectionError < Error; end
40
+ end
41
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ FeedbackData = Data.define(:received_lsn, :flushed_lsn, :applied_lsn, :client_clock, :reply_requested)
6
+
7
+ # Standby status feedback message builder.
8
+ #
9
+ # Logical replication clients periodically send standby status updates to
10
+ # tell PostgreSQL which WAL location has been received, flushed, and applied.
11
+ # `Feedback` models that update and can serialize itself into the CopyData
12
+ # payload expected by the replication protocol.
13
+ #
14
+ # @attr_reader received_lsn [Integer] latest WAL location received by the client
15
+ # @attr_reader flushed_lsn [Integer] latest WAL location flushed by the client
16
+ # @attr_reader applied_lsn [Integer] latest WAL location applied by the client
17
+ # @attr_reader client_clock [Integer] PostgreSQL timestamp in microseconds since 2000-01-01 UTC
18
+ # @attr_reader reply_requested [Boolean] whether this feedback is responding to an immediate-reply request
19
+ class Feedback < FeedbackData
20
+ # Build feedback using the current wall-clock time.
21
+ #
22
+ # By default, flushed and applied LSNs follow the received LSN. Callers can
23
+ # pass lower values if they need to distinguish receipt from durable flush
24
+ # or application progress.
25
+ #
26
+ # @param received_lsn [Integer] latest WAL location received by the client
27
+ # @param flushed_lsn [Integer] latest WAL location flushed by the client
28
+ # @param applied_lsn [Integer] latest WAL location applied by the client
29
+ # @param reply_requested [Boolean] whether this is an immediate reply
30
+ # @return [Feedback] immutable feedback value
31
+ def self.now(received_lsn:, flushed_lsn: received_lsn, applied_lsn: flushed_lsn, reply_requested: false)
32
+ new(received_lsn, flushed_lsn, applied_lsn, current_pg_time, reply_requested)
33
+ end
34
+
35
+ # Build a protocol CopyData payload for standby status update.
36
+ #
37
+ # The payload begins with the standby status update tag `r`, followed by
38
+ # three unsigned 64-bit LSN fields, the PostgreSQL timestamp, and a
39
+ # one-byte reply-requested flag.
40
+ #
41
+ # @return [String] frozen binary CopyData payload
42
+ def to_copy_data
43
+ (
44
+ "r".b +
45
+ [received_lsn, flushed_lsn, applied_lsn, client_clock].pack("Q>Q>Q>Q>") +
46
+ [reply_requested ? 1 : 0].pack("C")
47
+ ).freeze
48
+ end
49
+
50
+ # Current PostgreSQL protocol timestamp.
51
+ #
52
+ # PostgreSQL timestamps in replication messages are expressed as
53
+ # microseconds since 2000-01-01 00:00:00 UTC, not Unix epoch
54
+ # microseconds.
55
+ #
56
+ # @return [Integer] microseconds since 2000-01-01 UTC
57
+ def self.current_pg_time
58
+ ((Time.now.utc - Time.utc(2000, 1, 1)) * 1_000_000).to_i
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ KeepaliveData = Data.define(:wal_end, :server_clock, :reply_requested)
6
+
7
+ # Immutable primary keepalive replication message.
8
+ #
9
+ # PostgreSQL sends keepalive CopyData payloads while a replication stream is
10
+ # active. The payload layout is:
11
+ #
12
+ # ```text
13
+ # Byte 0 : message tag `k`
14
+ # Bytes 1-8 : current server WAL end, unsigned 64-bit big-endian
15
+ # Bytes 9-16 : server clock, PostgreSQL timestamp format
16
+ # Byte 17 : reply-requested flag, 1 for immediate feedback
17
+ # ```
18
+ #
19
+ # The stream layer uses this message to advance its known WAL position and to
20
+ # decide whether to send standby status feedback immediately.
21
+ #
22
+ # @attr_reader wal_end [Integer] latest server WAL position
23
+ # @attr_reader server_clock [Integer] PostgreSQL server timestamp in
24
+ # microseconds since 2000-01-01 UTC
25
+ # @attr_reader reply_requested [Boolean] whether PostgreSQL requested
26
+ # immediate feedback
27
+ class Keepalive < KeepaliveData
28
+ # Parse a keepalive CopyData payload.
29
+ #
30
+ # @param bytes [String] raw CopyData payload beginning with `k`
31
+ # @return [Keepalive] immutable parsed keepalive message
32
+ # @raise [ProtocolError] if the payload is empty, has the wrong message
33
+ # tag, or is too short to contain the required fields
34
+ def self.parse(bytes)
35
+ binary = bytes.b
36
+ raise ProtocolError, "empty CopyData payload" if binary.empty?
37
+ raise ProtocolError, "expected keepalive message" unless binary.getbyte(0) == "k".ord
38
+ raise ProtocolError, "truncated keepalive message" if binary.bytesize < 18
39
+
40
+ wal_end = unpack_u64(binary, 1)
41
+ server_clock = unpack_u64(binary, 9)
42
+ reply_requested = binary.getbyte(17) == 1
43
+
44
+ Ractor.make_shareable(new(wal_end, server_clock, reply_requested))
45
+ end
46
+
47
+ # Latest server WAL position formatted as a PostgreSQL LSN string.
48
+ #
49
+ # @return [String]
50
+ def wal_end_lsn = LSN.format(wal_end)
51
+
52
+ # @param binary [String]
53
+ # @param offset [Integer]
54
+ # @return [Integer]
55
+ def self.unpack_u64(binary, offset)
56
+ value = binary.byteslice(offset, 8)&.unpack1("Q>")
57
+ raise ProtocolError, "failed to unpack uint64" unless value.is_a?(Integer)
58
+
59
+ value
60
+ end
61
+ private_class_method :unpack_u64
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pgoutput
4
+ module Client
5
+ # PostgreSQL Log Sequence Number conversion helpers.
6
+ #
7
+ # PostgreSQL represents LSNs as two hexadecimal 32-bit halves separated by a
8
+ # slash, such as `0/16B6C50`. The replication protocol transmits the same WAL
9
+ # position as an unsigned 64-bit integer. This module converts between those
10
+ # two representations.
11
+ #
12
+ # @example Parse a textual LSN
13
+ # Pgoutput::Client::LSN.parse("0/10")
14
+ # # => 16
15
+ #
16
+ # @example Format an integer WAL position
17
+ # Pgoutput::Client::LSN.format(16)
18
+ # # => "0/10"
19
+ #
20
+ # @api public
21
+ module LSN
22
+ module_function
23
+
24
+ # Parse a PostgreSQL LSN string into an integer WAL position.
25
+ #
26
+ # @param value [#to_s] LSN string in `HEX/HEX` form
27
+ # @return [Integer] unsigned 64-bit WAL position
28
+ # @raise [ArgumentError] if the value is not a valid LSN string
29
+ def parse(value)
30
+ high, low = String(value).split("/", 2)
31
+ raise ArgumentError, "invalid LSN: #{value.inspect}" if high.nil? || low.nil?
32
+
33
+ (Integer(high, 16) << 32) + Integer(low, 16)
34
+ rescue ArgumentError
35
+ raise ArgumentError, "invalid LSN: #{value.inspect}"
36
+ end
37
+
38
+ # Format an integer WAL position as a PostgreSQL LSN string.
39
+ #
40
+ # @param value [#to_int, #to_s] non-negative integer WAL position
41
+ # @return [String] LSN string in uppercase hexadecimal `HEX/HEX` form
42
+ # @raise [ArgumentError] if the value is negative or cannot be coerced to
43
+ # an integer
44
+ def format(value)
45
+ integer = Integer(value)
46
+ raise ArgumentError, "LSN must be non-negative" if integer.negative?
47
+
48
+ high = integer >> 32
49
+ low = integer & 0xFFFF_FFFF
50
+ "#{high.to_s(16).upcase}/#{low.to_s(16).upcase}"
51
+ end
52
+ end
53
+ end
54
+ end