convert_sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +191 -0
- data/.yardopts +16 -0
- data/CONTRIBUTING.md +131 -0
- data/LICENSE +201 -0
- data/README.md +183 -0
- data/RELEASE.md +313 -0
- data/Rakefile +16 -0
- data/convert_sdk.gemspec +50 -0
- data/lib/convert_sdk/api_manager.rb +288 -0
- data/lib/convert_sdk/background_timer.rb +129 -0
- data/lib/convert_sdk/bucketed_feature.rb +35 -0
- data/lib/convert_sdk/bucketed_variation.rb +43 -0
- data/lib/convert_sdk/bucketing_manager.rb +134 -0
- data/lib/convert_sdk/client.rb +417 -0
- data/lib/convert_sdk/comparisons.rb +257 -0
- data/lib/convert_sdk/config.rb +214 -0
- data/lib/convert_sdk/config_validator.rb +127 -0
- data/lib/convert_sdk/context.rb +618 -0
- data/lib/convert_sdk/data_manager.rb +897 -0
- data/lib/convert_sdk/data_store_manager.rb +185 -0
- data/lib/convert_sdk/enums/bucketing_error.rb +18 -0
- data/lib/convert_sdk/enums/feature_status.rb +13 -0
- data/lib/convert_sdk/enums/goal_data_key.rb +62 -0
- data/lib/convert_sdk/enums/log_level.rb +22 -0
- data/lib/convert_sdk/enums/rule_error.rb +19 -0
- data/lib/convert_sdk/enums/system_events.rb +29 -0
- data/lib/convert_sdk/event_manager.rb +125 -0
- data/lib/convert_sdk/experience_manager.rb +69 -0
- data/lib/convert_sdk/feature_manager.rb +367 -0
- data/lib/convert_sdk/fork_guard.rb +144 -0
- data/lib/convert_sdk/http_client.rb +198 -0
- data/lib/convert_sdk/log_manager.rb +168 -0
- data/lib/convert_sdk/murmur_hash3.rb +129 -0
- data/lib/convert_sdk/redactor.rb +93 -0
- data/lib/convert_sdk/rule_manager.rb +242 -0
- data/lib/convert_sdk/segments_manager.rb +241 -0
- data/lib/convert_sdk/sentinel.rb +57 -0
- data/lib/convert_sdk/stores/memory_store.rb +55 -0
- data/lib/convert_sdk/stores/redis_store.rb +126 -0
- data/lib/convert_sdk/version.rb +14 -0
- data/lib/convert_sdk/visitors_queue.rb +190 -0
- data/lib/convert_sdk.rb +218 -0
- data/scripts/check-generated-rbs-header.sh +41 -0
- data/steep/config_contract_probe.rb +154 -0
- metadata +93 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
require "openssl"
|
|
7
|
+
|
|
8
|
+
module ConvertSdk
|
|
9
|
+
# The single hardened HTTP port every SDK request flows through.
|
|
10
|
+
#
|
|
11
|
+
# +HttpClient+ is the *only* file in the gem that touches +Net::HTTP+ (a cheap
|
|
12
|
+
# architectural regression test asserts this). Every request it sends carries
|
|
13
|
+
# the ConvertAgent wire invariant and bounded timeouts, and every failure it
|
|
14
|
+
# encounters is converted into a failed {Response} rather than raised — the
|
|
15
|
+
# port NEVER raises to callers, so the config fetch (Story 2.5) and event
|
|
16
|
+
# delivery (Story 4.1) consumers degrade gracefully on a failed response.
|
|
17
|
+
#
|
|
18
|
+
# == The ConvertAgent wire invariant
|
|
19
|
+
#
|
|
20
|
+
# The metrics endpoint's bot filter silently DROPS server-side events whose
|
|
21
|
+
# +User-Agent+ is not +ConvertAgent/1.0+. The header is therefore applied
|
|
22
|
+
# LAST, after every header merge, so it cannot be overridden by an
|
|
23
|
+
# integrator-supplied +User-Agent+. Without it, tracking events would vanish
|
|
24
|
+
# silently in production. (JS/PHP precedent: set unconditionally after merge.)
|
|
25
|
+
#
|
|
26
|
+
# == Bounded timeouts
|
|
27
|
+
#
|
|
28
|
+
# Both +open_timeout+ and +read_timeout+ are set explicitly on EVERY request
|
|
29
|
+
# (a deliberate improvement over the JS SDK, which sets none). The SDK can
|
|
30
|
+
# never hang a host thread waiting on a slow or dead endpoint.
|
|
31
|
+
#
|
|
32
|
+
# == TLS / Bearer / proxies
|
|
33
|
+
#
|
|
34
|
+
# HTTPS endpoints use TLS with verification ON (+verify_mode+ is never
|
|
35
|
+
# +VERIFY_NONE+). An +Authorization: Bearer ...+ header is stripped (and a
|
|
36
|
+
# warning logged) on any non-HTTPS endpoint so the SDK key secret never
|
|
37
|
+
# crosses the wire in plaintext. Proxies are honoured through the standard
|
|
38
|
+
# +Net::HTTP+ environment conventions (+http_proxy+/+https_proxy+/+no_proxy+).
|
|
39
|
+
#
|
|
40
|
+
# == JSON boundary
|
|
41
|
+
#
|
|
42
|
+
# Callers pass and receive Ruby hashes; JSON encode/decode happens only here.
|
|
43
|
+
# A request +body+ hash is rendered with +JSON.generate+; a response body is
|
|
44
|
+
# parsed with +JSON.parse+ (string keys). A parse failure is logged and yields
|
|
45
|
+
# +body: nil+ on an otherwise intact response.
|
|
46
|
+
#
|
|
47
|
+
# All logging goes through the injected {LogManager} (never +puts+), so the
|
|
48
|
+
# {Redactor} masks secrets and strips URL query strings from every line.
|
|
49
|
+
class HttpClient
|
|
50
|
+
# The mandatory wire User-Agent. Applied LAST so it is unoverridable.
|
|
51
|
+
USER_AGENT = "ConvertAgent/1.0"
|
|
52
|
+
|
|
53
|
+
# The status used for a failed Response when no HTTP response was received
|
|
54
|
+
# (network error / timeout). Callers MUST use {Response#success?}, never
|
|
55
|
+
# compare the status integer, for error detection.
|
|
56
|
+
FAILURE_STATUS = 0
|
|
57
|
+
|
|
58
|
+
# An immutable result of a single HTTP request.
|
|
59
|
+
#
|
|
60
|
+
# +status+ is the HTTP status integer (or {FAILURE_STATUS} on a transport
|
|
61
|
+
# failure); +body+ is the parsed JSON object (or nil); +headers+ is the
|
|
62
|
+
# response header hash; +#success?+ is a strict 2xx predicate.
|
|
63
|
+
#
|
|
64
|
+
# Declared as an explicit +class < Struct.new(...)+ subclass (NOT the
|
|
65
|
+
# +Struct.new do...end+ block form): this is the only shape Steep can
|
|
66
|
+
# statically resolve +#success?+ and the keyword constructor against, the
|
|
67
|
+
# same reason the frozen value objects (BucketedVariation/BucketedFeature)
|
|
68
|
+
# use it.
|
|
69
|
+
class Response < Struct.new(:status, :body, :headers, keyword_init: true)
|
|
70
|
+
# @return [void] builds the struct then freezes it (immutable value object).
|
|
71
|
+
def initialize(**)
|
|
72
|
+
super
|
|
73
|
+
freeze
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @return [Boolean] true iff +status+ is in the 2xx range.
|
|
77
|
+
def success?
|
|
78
|
+
status.between?(200, 299)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @param log_manager [LogManager] the injected logging surface. All output
|
|
83
|
+
# flows through it so the {Redactor} applies.
|
|
84
|
+
# @param open_timeout [Numeric] connection-establishment timeout (seconds).
|
|
85
|
+
# @param read_timeout [Numeric] response-read timeout (seconds).
|
|
86
|
+
def initialize(log_manager:, open_timeout:, read_timeout:)
|
|
87
|
+
@log_manager = log_manager
|
|
88
|
+
@open_timeout = open_timeout
|
|
89
|
+
@read_timeout = read_timeout
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Send one HTTP request and return a frozen {Response}. Never raises: any
|
|
93
|
+
# transport failure is logged and returned as a failed response.
|
|
94
|
+
#
|
|
95
|
+
# @param method [Symbol] +:get+ / +:post+ / etc.
|
|
96
|
+
# @param url [String] the absolute request URL.
|
|
97
|
+
# @param headers [Hash{String=>String}] caller headers (merged before the
|
|
98
|
+
# wire invariant is applied last).
|
|
99
|
+
# @param body [Hash, nil] a request body; JSON-encoded if present.
|
|
100
|
+
# @return [Response] frozen; +success?+ is the only valid error check.
|
|
101
|
+
def request(method:, url:, headers: {}, body: nil)
|
|
102
|
+
uri = URI.parse(url)
|
|
103
|
+
https = uri.scheme == "https"
|
|
104
|
+
wire_headers = build_headers(headers, https)
|
|
105
|
+
@log_manager.debug("HttpClient#request: #{method.to_s.upcase} #{url}")
|
|
106
|
+
|
|
107
|
+
perform(method, uri, https, wire_headers, body)
|
|
108
|
+
rescue StandardError => e
|
|
109
|
+
@log_manager.error("HttpClient#request: request failed (#{e.class}: #{e.message})")
|
|
110
|
+
failed_response
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
# Open a per-call connection (no reuse — the seam admits net-http-persistent
|
|
116
|
+
# post-MVP), set explicit timeouts, send the request, and map the result to
|
|
117
|
+
# a frozen {Response}.
|
|
118
|
+
def perform(method, uri, https, wire_headers, body)
|
|
119
|
+
host = uri.host or raise(ArgumentError, "URL has no host: #{uri}")
|
|
120
|
+
Net::HTTP.start(
|
|
121
|
+
host, uri.port,
|
|
122
|
+
use_ssl: https,
|
|
123
|
+
open_timeout: @open_timeout,
|
|
124
|
+
read_timeout: @read_timeout
|
|
125
|
+
) do |http|
|
|
126
|
+
http.open_timeout = @open_timeout
|
|
127
|
+
http.read_timeout = @read_timeout
|
|
128
|
+
net_response = http.request(build_request(method, uri, wire_headers, body))
|
|
129
|
+
build_response(net_response)
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Build the wire headers: caller headers first, then the ConvertAgent UA
|
|
134
|
+
# applied LAST (unoverridable), then the Bearer guard for non-HTTPS.
|
|
135
|
+
def build_headers(headers, https)
|
|
136
|
+
merged = {} #: Hash[String, String]
|
|
137
|
+
headers.each { |key, value| merged[key.to_s] = value }
|
|
138
|
+
merged["User-Agent"] = USER_AGENT
|
|
139
|
+
guard_bearer(merged, https)
|
|
140
|
+
merged
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Strip an Authorization header on a non-HTTPS endpoint (and warn): the SDK
|
|
144
|
+
# key secret must never cross the wire in plaintext.
|
|
145
|
+
def guard_bearer(headers, https)
|
|
146
|
+
return if https
|
|
147
|
+
return unless headers.key?("Authorization")
|
|
148
|
+
|
|
149
|
+
headers.delete("Authorization")
|
|
150
|
+
@log_manager.warn("HttpClient#request: stripped Authorization on non-HTTPS endpoint")
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Construct the Net::HTTP request object for +method+, attaching the JSON
|
|
154
|
+
# body (if any) and all wire headers.
|
|
155
|
+
def build_request(method, uri, wire_headers, body)
|
|
156
|
+
request_class = Net::HTTP.const_get(method.to_s.capitalize)
|
|
157
|
+
net_request = request_class.new(uri)
|
|
158
|
+
wire_headers.each { |key, value| net_request[key] = value }
|
|
159
|
+
if body
|
|
160
|
+
net_request["Content-Type"] ||= "application/json"
|
|
161
|
+
net_request.body = JSON.generate(body)
|
|
162
|
+
end
|
|
163
|
+
net_request
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Map a Net::HTTPResponse to a frozen {Response}, parsing the JSON body.
|
|
167
|
+
def build_response(net_response)
|
|
168
|
+
Response.new(
|
|
169
|
+
status: net_response.code.to_i,
|
|
170
|
+
body: parse_body(net_response.body),
|
|
171
|
+
headers: flatten_headers(net_response)
|
|
172
|
+
)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Parse a response body as JSON (string keys). A blank body or a parse
|
|
176
|
+
# failure yields nil; a failure is logged.
|
|
177
|
+
def parse_body(raw)
|
|
178
|
+
return nil if raw.nil? || raw.empty?
|
|
179
|
+
|
|
180
|
+
JSON.parse(raw)
|
|
181
|
+
rescue JSON::ParserError => e
|
|
182
|
+
@log_manager.warn("HttpClient#request: response body is not JSON (#{e.class})")
|
|
183
|
+
nil
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Flatten Net::HTTP's multi-value header representation into a simple Hash.
|
|
187
|
+
def flatten_headers(net_response)
|
|
188
|
+
headers = {} #: Hash[String, String]
|
|
189
|
+
net_response.each_header { |key, value| headers[key] = value }
|
|
190
|
+
headers
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# A failed Response with no HTTP status. Frozen, like every Response.
|
|
194
|
+
def failed_response
|
|
195
|
+
Response.new(status: FAILURE_STATUS, body: nil, headers: {}) #: Response
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ConvertSdk
|
|
4
|
+
# Multi-sink, level-gated logger with secret redaction wired in by
|
|
5
|
+
# construction.
|
|
6
|
+
#
|
|
7
|
+
# +LogManager+ is consumed by every manager from the HTTP client (Story 1.5)
|
|
8
|
+
# onward. It fans messages out to any number of stdlib-+Logger+-compatible
|
|
9
|
+
# *sinks* and guarantees, structurally, that no message reaches a sink
|
|
10
|
+
# without first passing through the {Redactor}: every public level method
|
|
11
|
+
# funnels through the single private +#emit+ path, and that path applies the
|
|
12
|
+
# +loggable+ conversion boundary and redaction before touching a sink. There
|
|
13
|
+
# is no public method that bypasses +#emit+.
|
|
14
|
+
#
|
|
15
|
+
# == Levels
|
|
16
|
+
#
|
|
17
|
+
# Verbosity is gated by the JS-parity {LogLevel} values (TRACE=0 … SILENT=5).
|
|
18
|
+
# A call at level +L+ emits only when +L >= configured_level+; +SILENT+
|
|
19
|
+
# suppresses everything. The stdlib +Logger+ has no +trace+, so both
|
|
20
|
+
# {#trace} and {#debug} dispatch to the sink's +#debug+ — the numeric level
|
|
21
|
+
# value (0 vs 1), not the sink method, decides whether they emit.
|
|
22
|
+
#
|
|
23
|
+
# Level conventions (callers choose the level by intent):
|
|
24
|
+
#
|
|
25
|
+
# * +trace+ / +debug+ — decisioning internals (bucketing, rule evaluation).
|
|
26
|
+
# * +info+ — lifecycle events (SDK ready, config refreshed).
|
|
27
|
+
# * +warn+ — recoverable conditions (stale config, retry).
|
|
28
|
+
# * +error+ — internal failures (parse error, exhausted retries).
|
|
29
|
+
#
|
|
30
|
+
# == Message format
|
|
31
|
+
#
|
|
32
|
+
# Callers pass messages already formatted as <tt>{ClassName}#{method}:
|
|
33
|
+
# {message}</tt>. +LogManager+ does not prepend the class name itself — the
|
|
34
|
+
# format is a usage convention, documented here and enforced at call sites.
|
|
35
|
+
#
|
|
36
|
+
# == Thread safety
|
|
37
|
+
#
|
|
38
|
+
# The sink list is guarded by +@sinks_mutex+. Compound operations on the list
|
|
39
|
+
# happen inside the lock; the (potentially slow, potentially raising) sink
|
|
40
|
+
# I/O happens outside the lock by iterating a +dup+ snapshot. A sink that
|
|
41
|
+
# raises is contained (rescue +StandardError+) so a broken sink never crashes
|
|
42
|
+
# the host or starves the other sinks.
|
|
43
|
+
class LogManager
|
|
44
|
+
# @param level [Integer] a {LogLevel} threshold; messages below it are
|
|
45
|
+
# suppressed. Defaults to ERROR (quiet by default).
|
|
46
|
+
# @param sink [Object, nil] an optional initial sink (anything responding
|
|
47
|
+
# to debug/info/warn/error). Invalid sinks are rejected, not raised.
|
|
48
|
+
# @param secrets [Array<String>] secret values to redact from every
|
|
49
|
+
# message. More can be added later via {#register_secret}.
|
|
50
|
+
def initialize(level: LogLevel::ERROR, sink: nil, secrets: [])
|
|
51
|
+
@level = level
|
|
52
|
+
@redactor = Redactor.new(secrets)
|
|
53
|
+
@sinks = []
|
|
54
|
+
# Thread safety: guarded by @sinks_mutex.
|
|
55
|
+
@sinks_mutex = Thread::Mutex.new
|
|
56
|
+
add_sink(sink) unless sink.nil?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# The methods every valid sink must respond to (stdlib +Logger+ contract).
|
|
60
|
+
REQUIRED_SINK_METHODS = %i[debug info warn error].freeze
|
|
61
|
+
|
|
62
|
+
# Register a sink. Accepted iff it duck-types to the stdlib +Logger+
|
|
63
|
+
# contract (responds to debug/info/warn/error). An invalid sink is rejected
|
|
64
|
+
# with a logged error rather than raising — registration must never crash
|
|
65
|
+
# the host.
|
|
66
|
+
#
|
|
67
|
+
# @param sink [Object] the candidate sink.
|
|
68
|
+
# @return [self] for chaining. A rejected sink is logged, not registered.
|
|
69
|
+
def add_sink(sink)
|
|
70
|
+
if REQUIRED_SINK_METHODS.all? { |m| sink.respond_to?(m) }
|
|
71
|
+
@sinks_mutex.synchronize { @sinks << sink }
|
|
72
|
+
else
|
|
73
|
+
emit(LogLevel::ERROR, "LogManager#add_sink: rejected sink #{sink.class} " \
|
|
74
|
+
"(must respond to #{REQUIRED_SINK_METHODS.join("/")})")
|
|
75
|
+
end
|
|
76
|
+
self
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Register an additional secret to redact (e.g. once the SDK key is known
|
|
80
|
+
# at +ConvertSdk.create+ time). nil/blank is a no-op.
|
|
81
|
+
#
|
|
82
|
+
# @param secret [String, nil]
|
|
83
|
+
# @return [void]
|
|
84
|
+
def register_secret(secret)
|
|
85
|
+
@redactor.register_secret(secret)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# @!method trace(message)
|
|
89
|
+
# Log at TRACE — decisioning internals. Dispatches to sink +#debug+.
|
|
90
|
+
# @!method debug(message)
|
|
91
|
+
# Log at DEBUG — decisioning internals. Dispatches to sink +#debug+.
|
|
92
|
+
# @!method info(message)
|
|
93
|
+
# Log at INFO — lifecycle events.
|
|
94
|
+
# @!method warn(message)
|
|
95
|
+
# Log at WARN — recoverable conditions.
|
|
96
|
+
# @!method error(message)
|
|
97
|
+
# Log at ERROR — internal failures.
|
|
98
|
+
|
|
99
|
+
# @param message [String] the already-formatted message.
|
|
100
|
+
# @return [void] log at TRACE (finest-grained); dispatches to the sink's +#debug+.
|
|
101
|
+
def trace(message)
|
|
102
|
+
emit(LogLevel::TRACE, message)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def debug(message)
|
|
106
|
+
emit(LogLevel::DEBUG, message)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def info(message)
|
|
110
|
+
emit(LogLevel::INFO, message)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def warn(message)
|
|
114
|
+
emit(LogLevel::WARN, message)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def error(message)
|
|
118
|
+
emit(LogLevel::ERROR, message)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
private
|
|
122
|
+
|
|
123
|
+
# The single emission funnel. Every public log path lands here. Gates on
|
|
124
|
+
# level, converts the argument across the +loggable+ boundary, redacts the
|
|
125
|
+
# result, then fans out to a snapshot of the sinks. No sink is touched with
|
|
126
|
+
# an unredacted string, and no sink failure escapes.
|
|
127
|
+
def emit(level, message)
|
|
128
|
+
return if level < @level
|
|
129
|
+
|
|
130
|
+
text = @redactor.redact(loggable(message))
|
|
131
|
+
sink_method = sink_method_for(level)
|
|
132
|
+
each_sink do |sink|
|
|
133
|
+
sink.public_send(sink_method, text)
|
|
134
|
+
rescue StandardError
|
|
135
|
+
# A broken sink must never crash the host or starve other sinks.
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# The +loggable+ conversion boundary (PHP qs-12 lesson): structured objects
|
|
140
|
+
# become a controlled string BEFORE redaction, since redaction operates on
|
|
141
|
+
# strings. Strings pass through unchanged; everything else is rendered with
|
|
142
|
+
# a compact +#inspect+ so a later raw-object dump cannot bypass redaction.
|
|
143
|
+
def loggable(message)
|
|
144
|
+
return message if message.is_a?(String)
|
|
145
|
+
|
|
146
|
+
message.inspect
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Map a {LogLevel} value to the sink method that carries it. TRACE and
|
|
150
|
+
# DEBUG both go to +#debug+ (stdlib has no trace); the rest map 1:1.
|
|
151
|
+
def sink_method_for(level)
|
|
152
|
+
case level
|
|
153
|
+
when LogLevel::TRACE, LogLevel::DEBUG then :debug
|
|
154
|
+
when LogLevel::INFO then :info
|
|
155
|
+
when LogLevel::WARN then :warn
|
|
156
|
+
else :error
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Iterate a snapshot of the sink list. The +dup+ is taken inside the lock
|
|
161
|
+
# so registration is atomic against iteration; the yielded I/O runs outside
|
|
162
|
+
# the lock so a slow/blocking sink cannot hold the mutex.
|
|
163
|
+
def each_sink(&)
|
|
164
|
+
snapshot = @sinks_mutex.synchronize { @sinks.dup }
|
|
165
|
+
snapshot.each(&)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ConvertSdk
|
|
4
|
+
# Vendored pure-Ruby MurmurHash3 (x86 32-bit variant).
|
|
5
|
+
#
|
|
6
|
+
# This is the cross-SDK hashing cornerstone: bucketing computes
|
|
7
|
+
# +MurmurHash3.hash(experienceId + visitorId, 9999)+ and MUST produce a
|
|
8
|
+
# byte-identical result to every other Convert SDK (JS, PHP), or a visitor
|
|
9
|
+
# would bucket into a different variation on Ruby than on web. The 75-vector
|
|
10
|
+
# parity suite (+spec/cross_sdk/hash_vectors_spec.rb+) is the proof.
|
|
11
|
+
#
|
|
12
|
+
# Implemented as pure Ruby with explicit 32-bit masking — no C extension and
|
|
13
|
+
# no gemspec dependency, so it is JRuby-compatible by construction. Ruby
|
|
14
|
+
# integers are arbitrary-precision; the +& MASK_32+ on every arithmetic step
|
|
15
|
+
# is the correctness boundary that emulates 32-bit unsigned overflow.
|
|
16
|
+
#
|
|
17
|
+
# Reference: Austin Appleby's MurmurHash3_x86_32 (public domain).
|
|
18
|
+
# https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
|
|
19
|
+
#
|
|
20
|
+
# @api private
|
|
21
|
+
module MurmurHash3
|
|
22
|
+
# 32-bit overflow mask — applied after every multiply/add/shift.
|
|
23
|
+
MASK_32 = 0xFFFFFFFF
|
|
24
|
+
|
|
25
|
+
# Canonical MurmurHash3_x86_32 k1-scramble multiply constant.
|
|
26
|
+
C1 = 0xcc9e2d51
|
|
27
|
+
# Canonical MurmurHash3_x86_32 k1-scramble second multiply constant.
|
|
28
|
+
C2 = 0x1b873593
|
|
29
|
+
# Body block mixing multiplier: rotl(h1, R2) * M + N.
|
|
30
|
+
M = 5
|
|
31
|
+
# Body block mixing addend: rotl(h1, R2) * M + N.
|
|
32
|
+
N = 0xe6546b64
|
|
33
|
+
# k1 rotate-left amount before the * C2 multiply.
|
|
34
|
+
R1 = 15
|
|
35
|
+
# h1 rotate-left amount in the body mix.
|
|
36
|
+
R2 = 13
|
|
37
|
+
|
|
38
|
+
# Finalization mix (fmix32) first multiply constant.
|
|
39
|
+
FMIX_C1 = 0x85ebca6b
|
|
40
|
+
# Finalization mix (fmix32) second multiply constant.
|
|
41
|
+
FMIX_C2 = 0xc2b2ae35
|
|
42
|
+
|
|
43
|
+
# Compute the MurmurHash3 x86 32-bit hash of +key+ with +seed+.
|
|
44
|
+
#
|
|
45
|
+
# @param key [String] the key; hashed over its UTF-8 byte sequence.
|
|
46
|
+
# @param seed [Integer] the 32-bit seed.
|
|
47
|
+
# @return [Integer] unsigned 32-bit hash value in the range 0..0xFFFFFFFF.
|
|
48
|
+
def self.hash(key, seed)
|
|
49
|
+
data = key.b # raw bytes (ASCII-8BIT view); UTF-8 multi-byte chars hash over their bytes
|
|
50
|
+
length = data.bytesize
|
|
51
|
+
h1 = seed & MASK_32
|
|
52
|
+
|
|
53
|
+
h1 = mix_body(data, length, h1)
|
|
54
|
+
h1 = mix_tail(data, length, h1)
|
|
55
|
+
|
|
56
|
+
# Finalization: fold in the length, then avalanche.
|
|
57
|
+
fmix32(h1 ^ length)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Process all full 4-byte little-endian body blocks.
|
|
61
|
+
def self.mix_body(data, length, h1)
|
|
62
|
+
block_count = length / 4
|
|
63
|
+
block_count.times do |block|
|
|
64
|
+
i = block * 4
|
|
65
|
+
k1 = read_u32_le(data, i)
|
|
66
|
+
h1 ^= mix_k1(k1)
|
|
67
|
+
h1 = ((rotl32(h1, R2) * M) + N) & MASK_32
|
|
68
|
+
end
|
|
69
|
+
h1
|
|
70
|
+
end
|
|
71
|
+
private_class_method :mix_body
|
|
72
|
+
|
|
73
|
+
# Process the trailing 1..3 bytes (the tail) in little-endian order.
|
|
74
|
+
def self.mix_tail(data, length, h1)
|
|
75
|
+
tail_start = (length / 4) * 4
|
|
76
|
+
remaining = length & 3
|
|
77
|
+
return h1 if remaining.zero?
|
|
78
|
+
|
|
79
|
+
k1 = 0
|
|
80
|
+
k1 |= byte_at(data, tail_start + 2) << 16 if remaining >= 3
|
|
81
|
+
k1 |= byte_at(data, tail_start + 1) << 8 if remaining >= 2
|
|
82
|
+
k1 |= byte_at(data, tail_start) # remaining >= 1
|
|
83
|
+
h1 ^ mix_k1(k1)
|
|
84
|
+
end
|
|
85
|
+
private_class_method :mix_tail
|
|
86
|
+
|
|
87
|
+
# The shared k1 scramble: k1 * C1, rotl R1, * C2.
|
|
88
|
+
def self.mix_k1(k1)
|
|
89
|
+
k1 = (k1 * C1) & MASK_32
|
|
90
|
+
k1 = rotl32(k1, R1)
|
|
91
|
+
(k1 * C2) & MASK_32
|
|
92
|
+
end
|
|
93
|
+
private_class_method :mix_k1
|
|
94
|
+
|
|
95
|
+
# Read a 32-bit little-endian word at byte offset +index+.
|
|
96
|
+
def self.read_u32_le(data, index)
|
|
97
|
+
byte_at(data, index) |
|
|
98
|
+
(byte_at(data, index + 1) << 8) |
|
|
99
|
+
(byte_at(data, index + 2) << 16) |
|
|
100
|
+
(byte_at(data, index + 3) << 24)
|
|
101
|
+
end
|
|
102
|
+
private_class_method :read_u32_le
|
|
103
|
+
|
|
104
|
+
# Read one byte as a guaranteed Integer. Every call site has already bounds-
|
|
105
|
+
# checked the index, so a nil here would be a genuine bug — surface it.
|
|
106
|
+
def self.byte_at(data, index)
|
|
107
|
+
Integer(data.getbyte(index))
|
|
108
|
+
end
|
|
109
|
+
private_class_method :byte_at
|
|
110
|
+
|
|
111
|
+
# 32-bit left rotate.
|
|
112
|
+
def self.rotl32(value, shift)
|
|
113
|
+
value &= MASK_32
|
|
114
|
+
((value << shift) | (value >> (32 - shift))) & MASK_32
|
|
115
|
+
end
|
|
116
|
+
private_class_method :rotl32
|
|
117
|
+
|
|
118
|
+
# fmix32 avalanche finalizer.
|
|
119
|
+
def self.fmix32(hash)
|
|
120
|
+
hash &= MASK_32
|
|
121
|
+
hash ^= hash >> 16
|
|
122
|
+
hash = (hash * FMIX_C1) & MASK_32
|
|
123
|
+
hash ^= hash >> 13
|
|
124
|
+
hash = (hash * FMIX_C2) & MASK_32
|
|
125
|
+
hash ^ (hash >> 16)
|
|
126
|
+
end
|
|
127
|
+
private_class_method :fmix32
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ConvertSdk
|
|
4
|
+
# Masks secrets and strips URL query strings out of log messages.
|
|
5
|
+
#
|
|
6
|
+
# +Redactor+ is the structural guarantee behind security NFR5: it is wired
|
|
7
|
+
# *inside* {LogManager} so that no call path can emit a message without first
|
|
8
|
+
# passing through {#redact}. Redaction is by construction, not by discipline.
|
|
9
|
+
#
|
|
10
|
+
# Two transforms are applied to every message:
|
|
11
|
+
#
|
|
12
|
+
# * *Secret masking* — each known secret (e.g. an +sdk_key+ /
|
|
13
|
+
# +sdk_key_secret+ value) is replaced wherever it occurs with its first
|
|
14
|
+
# four characters followed by a single-character ellipsis (+abcd…+).
|
|
15
|
+
# Secrets shorter than four characters are replaced entirely (+…+).
|
|
16
|
+
# * *URL query stripping* — any +http(s)+ URL has its +?query=string+ removed
|
|
17
|
+
# (+https://host/path?x=1+ becomes +https://host/path+), since query
|
|
18
|
+
# strings frequently carry tokens.
|
|
19
|
+
#
|
|
20
|
+
# Secrets become known at different times: some at construction, some at
|
|
21
|
+
# +ConvertSdk.create+ time. {#register_secret} allows late registration so
|
|
22
|
+
# the same redactor instance can be wired before all secrets are known.
|
|
23
|
+
#
|
|
24
|
+
# Redaction operates on *strings* — structured objects must already have
|
|
25
|
+
# passed the +loggable+ conversion boundary (see {LogManager}) before
|
|
26
|
+
# reaching here.
|
|
27
|
+
class Redactor
|
|
28
|
+
# Number of leading characters kept unmasked for a secret long enough to
|
|
29
|
+
# retain a prefix. JS-parity disclosure budget.
|
|
30
|
+
MASK_PREFIX_LENGTH = 4
|
|
31
|
+
# The single-character ellipsis appended after the unmasked prefix (or used
|
|
32
|
+
# as the whole replacement for short secrets).
|
|
33
|
+
MASK_GLYPH = "…"
|
|
34
|
+
# Matches an +http(s)+ URL's query string: a +?+ and everything up to the
|
|
35
|
+
# next whitespace. The query is stripped; the path is kept.
|
|
36
|
+
URL_QUERY_PATTERN = %r{(https?://\S*?)\?\S*}
|
|
37
|
+
|
|
38
|
+
# @param secrets [Array<String, nil>] secret values to mask. nil/blank
|
|
39
|
+
# entries are ignored.
|
|
40
|
+
def initialize(secrets = [])
|
|
41
|
+
@secrets = []
|
|
42
|
+
Array(secrets).each { |secret| register_secret(secret) }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Register an additional secret to mask. Safe to call after construction
|
|
46
|
+
# (e.g. once the SDK key is known at +ConvertSdk.create+ time).
|
|
47
|
+
#
|
|
48
|
+
# @param secret [String, nil] the secret value. nil/blank is a no-op.
|
|
49
|
+
# @return [void]
|
|
50
|
+
def register_secret(secret)
|
|
51
|
+
return if secret.nil?
|
|
52
|
+
|
|
53
|
+
value = secret.to_s
|
|
54
|
+
return if value.strip.empty?
|
|
55
|
+
|
|
56
|
+
@secrets << value unless @secrets.include?(value)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Apply secret masking and URL query stripping to +message+.
|
|
60
|
+
#
|
|
61
|
+
# @param message [String] the message to redact.
|
|
62
|
+
# @return [String] the redacted message (a new string; +message+ is not
|
|
63
|
+
# mutated).
|
|
64
|
+
def redact(message)
|
|
65
|
+
result = strip_url_queries(message.to_s)
|
|
66
|
+
mask_secrets(result)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
# Replace every occurrence of every known secret with its masked form.
|
|
72
|
+
# Longer secrets are masked first so a secret that is a prefix of another
|
|
73
|
+
# does not partially un-mask the longer one.
|
|
74
|
+
def mask_secrets(message)
|
|
75
|
+
@secrets.sort_by { |secret| -secret.length }.each do |secret|
|
|
76
|
+
message = message.gsub(secret, masked(secret))
|
|
77
|
+
end
|
|
78
|
+
message
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# @return [String] +abcd…+ for secrets >= 4 chars, +…+ otherwise.
|
|
82
|
+
def masked(secret)
|
|
83
|
+
return MASK_GLYPH if secret.length < MASK_PREFIX_LENGTH
|
|
84
|
+
|
|
85
|
+
"#{secret[0, MASK_PREFIX_LENGTH]}#{MASK_GLYPH}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Drop the query string from any URL in the message, keeping the path.
|
|
89
|
+
def strip_url_queries(message)
|
|
90
|
+
message.gsub(URL_QUERY_PATTERN, '\1')
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|