browserctl 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -0
  3. data/README.md +1 -1
  4. data/bin/browserctl +45 -4
  5. data/lib/browserctl/client.rb +47 -3
  6. data/lib/browserctl/commands/cli_output.rb +16 -3
  7. data/lib/browserctl/commands/flow.rb +123 -0
  8. data/lib/browserctl/commands/state.rb +193 -0
  9. data/lib/browserctl/commands/workflow.rb +62 -4
  10. data/lib/browserctl/constants.rb +1 -1
  11. data/lib/browserctl/detectors/auth_required.rb +128 -0
  12. data/lib/browserctl/detectors.rb +2 -0
  13. data/lib/browserctl/errors.rb +36 -0
  14. data/lib/browserctl/flow.rb +215 -0
  15. data/lib/browserctl/flow_registry.rb +66 -0
  16. data/lib/browserctl/flows/stdlib/basic_auth.rb +30 -0
  17. data/lib/browserctl/flows/stdlib/cloudflare_solve.rb +59 -0
  18. data/lib/browserctl/flows/stdlib/magic_link_email.rb +28 -0
  19. data/lib/browserctl/flows/stdlib/oauth_github.rb +28 -0
  20. data/lib/browserctl/flows/stdlib/oauth_google.rb +30 -0
  21. data/lib/browserctl/flows/stdlib/totp_2fa.rb +61 -0
  22. data/lib/browserctl/recording.rb +212 -26
  23. data/lib/browserctl/replay/context.rb +40 -0
  24. data/lib/browserctl/replay/fingerprint_matcher.rb +86 -0
  25. data/lib/browserctl/replay/snapshot_diff.rb +51 -0
  26. data/lib/browserctl/replay/telemetry.rb +60 -0
  27. data/lib/browserctl/runner.rb +38 -4
  28. data/lib/browserctl/server/command_dispatcher.rb +10 -1
  29. data/lib/browserctl/server/handlers/interaction.rb +3 -3
  30. data/lib/browserctl/server/handlers/navigation.rb +33 -4
  31. data/lib/browserctl/server/handlers/observation.rb +43 -2
  32. data/lib/browserctl/server/handlers/state.rb +149 -0
  33. data/lib/browserctl/server/page_session.rb +9 -7
  34. data/lib/browserctl/server/snapshot_builder.rb +21 -45
  35. data/lib/browserctl/snapshot/annotator.rb +75 -0
  36. data/lib/browserctl/snapshot/extractor.rb +21 -0
  37. data/lib/browserctl/snapshot/fingerprint.rb +88 -0
  38. data/lib/browserctl/snapshot/ref.rb +70 -0
  39. data/lib/browserctl/snapshot/serializer.rb +17 -0
  40. data/lib/browserctl/state/bundle.rb +242 -0
  41. data/lib/browserctl/state/transport.rb +64 -0
  42. data/lib/browserctl/state/transports/file.rb +35 -0
  43. data/lib/browserctl/state/transports/one_password.rb +67 -0
  44. data/lib/browserctl/state/transports/s3.rb +42 -0
  45. data/lib/browserctl/state.rb +208 -0
  46. data/lib/browserctl/version.rb +1 -1
  47. data/lib/browserctl/workflow/flow_wrapper.rb +81 -0
  48. data/lib/browserctl/workflow/promoter.rb +96 -0
  49. data/lib/browserctl/workflow/promotion_ledger.rb +72 -0
  50. data/lib/browserctl/workflow.rb +180 -16
  51. metadata +32 -2
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../flow"
4
+
5
+ # Clicks the "Authorize <app>" button on a GitHub OAuth consent screen.
6
+ #
7
+ # Assumes the user is already signed in to GitHub and the page is parked
8
+ # on the consent URL — this flow does not handle the credential entry
9
+ # step. Use a separate workflow or flow to land on the consent page first.
10
+ Browserctl.flow("oauth_github") do
11
+ version "1.0.0"
12
+ requires_browserctl "0.11.0"
13
+ desc "Click the Authorize button on a GitHub OAuth consent screen."
14
+
15
+ # The default selector targets the green Authorize submit button on
16
+ # github.com/login/oauth/authorize. GitHub keeps name="authorize" stable
17
+ # across UI revisions; override only if you're testing against a forked
18
+ # GitHub Enterprise instance with a customised template.
19
+ param :authorize_selector, default: 'button[name="authorize"][value="1"]'
20
+
21
+ precondition("on a github oauth consent page") do
22
+ page.url.include?("/login/oauth/authorize")
23
+ end
24
+
25
+ step("click authorize") do
26
+ page.click(authorize_selector)
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../flow"
4
+
5
+ # Clicks the Continue / Allow button on a Google OAuth consent screen.
6
+ #
7
+ # Assumes the user is already signed in to Google and the page is parked
8
+ # on accounts.google.com showing the consent prompt. This flow does not
9
+ # pick an account from the chooser, enter a password, or solve 2FA —
10
+ # compose those before calling this flow.
11
+ #
12
+ # Google rotates consent UI more often than GitHub, so the default
13
+ # selector is a best-effort match against the modern Material 3 button.
14
+ # Override if your account or app version sees a different layout.
15
+ Browserctl.flow("oauth_google") do
16
+ version "1.0.0"
17
+ requires_browserctl "0.11.0"
18
+ desc "Click the Continue/Allow button on a Google OAuth consent screen."
19
+
20
+ param :continue_selector, default: 'button[jsname="LgbsSe"]'
21
+
22
+ precondition("on a google oauth consent page") do
23
+ url = page.url
24
+ url.include?("accounts.google.com") && (url.include?("/oauth") || url.include?("/signin/oauth"))
25
+ end
26
+
27
+ step("click continue") do
28
+ page.click(continue_selector)
29
+ end
30
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openssl"
4
+ require_relative "../../flow"
5
+
6
+ module Browserctl
7
+ module Flows
8
+ # RFC 6238 TOTP code generation from a base32 secret.
9
+ # Pure Ruby; no network and no external gem.
10
+ module TOTP
11
+ module_function
12
+
13
+ def generate(secret, at: Time.now, digits: 6, period: 30, digest: "SHA1")
14
+ counter = (at.to_i / period).to_i
15
+ key = decode_base32(secret)
16
+ counter_b = [counter].pack("Q>") # 64-bit big-endian
17
+ hmac = OpenSSL::HMAC.digest(digest, key, counter_b)
18
+ offset = hmac[-1].ord & 0x0f
19
+ truncated = hmac[offset, 4].unpack1("N") & 0x7fffffff
20
+ truncated.to_s.rjust(digits, "0")[-digits..]
21
+ end
22
+
23
+ BASE32_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
24
+
25
+ def decode_base32(secret)
26
+ cleaned = secret.to_s.upcase.gsub(/[^A-Z2-7]/, "")
27
+ bits = cleaned.each_char.map { |c| char_to_bits(c) }.join
28
+ whole_bytes = bits[0, (bits.length / 8) * 8]
29
+ whole_bytes.scan(/.{8}/).map { |b| b.to_i(2).chr }.join
30
+ end
31
+
32
+ def char_to_bits(char)
33
+ idx = BASE32_ALPHABET.index(char) or
34
+ raise ArgumentError, "invalid base32 char #{char.inspect}"
35
+ idx.to_s(2).rjust(5, "0")
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ Browserctl.flow("totp_2fa") do
42
+ version "1.0.0"
43
+ requires_browserctl "0.11.0"
44
+ desc "Generate an RFC 6238 TOTP code from a base32 secret and type it into the page."
45
+
46
+ param :secret, required: true, secret: true
47
+ param :selector, required: true
48
+ param :digits, default: 6
49
+ param :period, default: 30
50
+
51
+ precondition("page proxy is present") { !page.nil? }
52
+
53
+ step("compute and fill code") do
54
+ code = Browserctl::Flows::TOTP.generate(
55
+ secret,
56
+ digits: digits.to_i,
57
+ period: period.to_i
58
+ )
59
+ page.fill(selector, code)
60
+ end
61
+ end
@@ -2,12 +2,13 @@
2
2
 
3
3
  require "json"
4
4
  require "date"
5
+ require "time"
5
6
  require "fileutils"
6
7
  require "tmpdir"
7
8
  require "uri"
8
9
 
9
10
  module Browserctl
10
- class Recording
11
+ class Recording # rubocop:disable Metrics/ClassLength
11
12
  RECORDINGS_DIR = File.join(Dir.tmpdir, "browserctl-recordings")
12
13
  STATE_FILE = File.expand_path("~/.browserctl/active_recording")
13
14
 
@@ -15,6 +16,22 @@ module Browserctl
15
16
 
16
17
  SENSITIVE_PARAM_PATTERN = /\A(token|key|secret|auth|code|access_token|api_key|client_secret|state)\z/ix
17
18
 
19
+ # Selector tokens that signal a fill is targeting a secret-shaped field.
20
+ # The captured group (or matched substring) is used as the inferred field
21
+ # name; that name later drives the generated `secret_ref:` placeholder.
22
+ SECRET_FIELD_PATTERN = /\b(password|passwd|api[_-]?key|token|secret|otp|pin|client[_-]?secret|access[_-]?token)\b/i
23
+
24
+ # Conservative thresholds for inferring an explicit wait between recorded
25
+ # steps. Gaps shorter than the threshold come from natural input cadence;
26
+ # gaps above it usually mean the page actually had work to do.
27
+ WAIT_THRESHOLD_SECONDS = 1.5
28
+ WAIT_PADDING_SECONDS = 5
29
+ WAIT_FLOOR_SECONDS = 5
30
+
31
+ # Bumped when the recording log shape changes in a way that older
32
+ # tooling (workflow generate, replay) cannot read.
33
+ LOG_FORMAT = "v0.11"
34
+
18
35
  def self.start(name)
19
36
  FileUtils.mkdir_p(RECORDINGS_DIR, mode: 0o700)
20
37
  FileUtils.mkdir_p(File.dirname(STATE_FILE))
@@ -22,6 +39,14 @@ module Browserctl
22
39
  FileUtils.rm_f(log_path(name))
23
40
  FileUtils.touch(log_path(name))
24
41
  File.chmod(0o600, log_path(name))
42
+ File.open(log_path(name), "a") do |f|
43
+ f.puts JSON.generate(
44
+ cmd: "_meta",
45
+ log_format: LOG_FORMAT,
46
+ recording: name,
47
+ started_at: Time.now.utc.iso8601
48
+ )
49
+ end
25
50
  name
26
51
  end
27
52
 
@@ -37,40 +62,45 @@ module Browserctl
37
62
  File.exist?(STATE_FILE) ? File.read(STATE_FILE).strip : nil
38
63
  end
39
64
 
40
- def self.append(cmd, **attrs)
65
+ def self.append(cmd, response: nil, **attrs)
41
66
  name = active
42
67
  return unless name
43
68
  return unless RECORDABLE.include?(cmd.to_s)
44
69
 
45
70
  if %w[click fill].include?(cmd.to_s) && attrs[:selector].nil?
46
- record_ref_interaction(name, cmd.to_s, attrs)
71
+ record_ref_interaction(name, cmd.to_s, attrs, response)
47
72
  return
48
73
  end
49
74
 
50
75
  attrs = prepare_attrs(cmd.to_s, attrs)
76
+ entry = { cmd: cmd.to_s, ts: now }.merge(attrs.transform_keys(&:to_s))
77
+ entry.merge!(replay_metadata(response)) if response
51
78
 
52
79
  File.open(log_path(name), "a") do |f|
53
- f.puts JSON.generate({ cmd: cmd.to_s }.merge(attrs.transform_keys(&:to_s)))
80
+ f.puts JSON.generate(entry)
54
81
  end
55
82
  end
56
83
 
57
- def self.generate_workflow(name, output_path: nil)
84
+ def self.generate_workflow(name, output_path: nil, keep_log: false)
58
85
  log = log_path(name)
59
86
  raise "no recording found for '#{name}'" unless File.exist?(log)
60
87
 
61
- lines = File.readlines(log).map { |l| JSON.parse(l, symbolize_names: true) }
88
+ raw = File.readlines(log).map { |l| JSON.parse(l, symbolize_names: true) }
89
+ lines = raw.reject { |l| l[:cmd] == "_meta" }
62
90
  ruby = build_workflow_ruby(name, lines)
63
91
  File.write(output_path, ruby) if output_path
92
+ warn_about_ref_interactions(lines)
93
+ ruby
94
+ ensure
95
+ FileUtils.rm_f(log) if log && !keep_log
96
+ end
64
97
 
98
+ def self.warn_about_ref_interactions(lines)
65
99
  ref_count = lines.count { |l| l[:cmd] == "_ref_interaction" }
66
- if ref_count.positive?
67
- warn "Warning: #{ref_count} ref-based interaction(s) were captured but cannot be replayed by ref."
68
- warn "Search the generated workflow for 'TODO: ref-based' and replace with stable CSS selectors."
69
- end
100
+ return unless ref_count.positive?
70
101
 
71
- ruby
72
- ensure
73
- FileUtils.rm_f(log) if log
102
+ warn "Warning: #{ref_count} ref-based interaction(s) were captured but cannot be replayed by ref."
103
+ warn "Search the generated workflow for 'TODO: ref-based' and replace with stable CSS selectors."
74
104
  end
75
105
 
76
106
  class << self
@@ -80,26 +110,163 @@ module Browserctl
80
110
  File.join(RECORDINGS_DIR, "#{name}.jsonl")
81
111
  end
82
112
 
83
- def record_ref_interaction(recording_name, cmd, attrs)
84
- entry = { cmd: "_ref_interaction", action: cmd, ref: attrs[:ref], name: attrs[:name] }
113
+ def record_ref_interaction(recording_name, cmd, attrs, response)
114
+ entry = { cmd: "_ref_interaction", ts: now, action: cmd, ref: attrs[:ref], name: attrs[:name] }
115
+ entry.merge!(replay_metadata(response)) if response
85
116
  File.open(log_path(recording_name), "a") do |f|
86
117
  f.puts JSON.generate(entry)
87
118
  end
88
119
  end
89
120
 
121
+ # Pulls the replay-relevant fields out of a daemon response. Each
122
+ # is optional — older daemons or non-resolving commands may omit
123
+ # any of them.
124
+ def now
125
+ Time.now.utc.to_f
126
+ end
127
+
128
+ def replay_metadata(response)
129
+ meta = {}
130
+ meta[:ref] = response[:ref] if response[:ref]
131
+ meta[:fingerprint] = response[:fingerprint] if response[:fingerprint]
132
+ meta[:snapshot_id] = response[:snapshot_id] if response[:snapshot_id]
133
+ meta[:postcondition_hint] = response[:postcondition_hint] if response[:postcondition_hint]
134
+ meta[:post_snapshot_digest] = response[:post_snapshot_digest] if response[:post_snapshot_digest]
135
+ meta.transform_keys(&:to_s)
136
+ end
137
+
90
138
  def build_workflow_ruby(name, commands)
91
- steps = commands.map { |c| build_step(c) }.join("\n\n")
139
+ steps = annotated_steps(commands).join("\n\n")
140
+ secrets = commands.map { |c| c[:secret_field] }.compact.uniq
141
+ header = secret_header(secrets)
92
142
  <<~RUBY
93
143
  # frozen_string_literal: true
94
-
144
+ #{header}
95
145
  Browserctl.workflow #{name.inspect} do
96
146
  desc "Recorded on #{Date.today}"
97
-
147
+ #{secrets.map { |f| " param :secret_#{f}, secret: true" }.join("\n")}
98
148
  #{steps.gsub(/^/, ' ')}
99
149
  end
100
150
  RUBY
101
151
  end
102
152
 
153
+ # Walks the recorded events and emits the rendered step strings,
154
+ # interleaving inferred waits before selector-driven actions whose
155
+ # preceding gap exceeds WAIT_THRESHOLD_SECONDS, and inferred URL
156
+ # postconditions after click/fill steps that triggered navigation.
157
+ def annotated_steps(commands)
158
+ last_url = {}
159
+ commands.each_with_index.flat_map do |cmd, i|
160
+ rendered = []
161
+ if i.positive? && (wait = inferred_wait_step(commands[i - 1], cmd))
162
+ rendered << wait
163
+ end
164
+ rendered << build_step(cmd)
165
+ if (post = url_postcondition_step(cmd, last_url))
166
+ rendered << post
167
+ end
168
+ if (snap = snapshot_postcondition_step(cmd))
169
+ rendered << snap
170
+ end
171
+ update_last_url!(cmd, last_url)
172
+ rendered
173
+ end
174
+ end
175
+
176
+ # Emits a postcondition assertion when a click/fill resulted in a URL
177
+ # change. Compares the canonical (scheme+host+path) form so query
178
+ # strings and fragments don't make every replay flaky.
179
+ def url_postcondition_step(cmd, last_url)
180
+ return nil unless %w[click fill].include?(cmd[:cmd])
181
+ return nil unless cmd[:postcondition_hint] && cmd[:postcondition_hint][:url]
182
+
183
+ page = cmd[:name]
184
+ observed = cmd[:postcondition_hint][:url]
185
+ prior = last_url[page]
186
+ return nil if canonical_url(observed) == canonical_url(prior)
187
+
188
+ prefix = canonical_url(observed)
189
+ return nil unless prefix
190
+
191
+ <<~RUBY.chomp
192
+ step "assert url after #{cmd[:cmd]} on #{page}" do
193
+ current = page(:#{page}).url
194
+ assert current.start_with?(#{prefix.inspect}), "expected URL to start with #{prefix}, got \#{current}"
195
+ end
196
+ RUBY
197
+ end
198
+
199
+ # Emits an assert_snapshot_stable step when the recording captured a
200
+ # post-step DOM digest. Under workflow run --check the helper records
201
+ # drift on mismatch instead of raising, so a wiggly page surfaces in
202
+ # the report rather than failing the run outright.
203
+ def snapshot_postcondition_step(cmd)
204
+ return nil unless %w[click fill].include?(cmd[:cmd])
205
+ return nil unless cmd[:post_snapshot_digest]
206
+
207
+ page = cmd[:name]
208
+ digest = cmd[:post_snapshot_digest]
209
+ <<~RUBY.chomp
210
+ step "assert post-snapshot stable on #{page}" do
211
+ assert_snapshot_stable(:#{page}, expected_digest: #{digest.inspect})
212
+ end
213
+ RUBY
214
+ end
215
+
216
+ def update_last_url!(cmd, last_url)
217
+ case cmd[:cmd]
218
+ when "navigate", "page_open"
219
+ last_url[cmd[:name]] = cmd[:url] if cmd[:url]
220
+ when "click", "fill"
221
+ observed = cmd[:postcondition_hint] && cmd[:postcondition_hint][:url]
222
+ last_url[cmd[:name]] = observed if observed
223
+ end
224
+ end
225
+
226
+ def canonical_url(url)
227
+ return nil if url.nil? || url.empty?
228
+
229
+ uri = URI.parse(url)
230
+ path = uri.path.to_s
231
+ path = "/" if path.empty?
232
+ "#{uri.scheme}://#{uri.host}#{path}"
233
+ rescue URI::InvalidURIError
234
+ nil
235
+ end
236
+
237
+ def inferred_wait_step(prev, current)
238
+ return nil unless %w[fill click].include?(current[:cmd])
239
+ return nil unless current[:selector]
240
+
241
+ delta = elapsed(prev, current)
242
+ return nil unless delta && delta >= WAIT_THRESHOLD_SECONDS
243
+
244
+ timeout = [WAIT_FLOOR_SECONDS, delta.ceil + WAIT_PADDING_SECONDS].max
245
+ page = current[:name]
246
+ sel = current[:selector]
247
+ <<~RUBY.chomp
248
+ # inferred wait: prior step took ~#{format('%.1f', delta)}s
249
+ step "wait for #{sel} on #{page}" do
250
+ page(:#{page}).wait(#{sel.inspect}, timeout: #{timeout})
251
+ end
252
+ RUBY
253
+ end
254
+
255
+ def elapsed(prev, current)
256
+ return nil unless prev && current && prev[:ts] && current[:ts]
257
+
258
+ current[:ts] - prev[:ts]
259
+ end
260
+
261
+ def secret_header(secrets)
262
+ return "" if secrets.empty?
263
+
264
+ lines = ["# TODO: review the following secret-shaped fields detected during recording.",
265
+ "# Configure a secret_ref: source for each before running:"]
266
+ secrets.each { |f| lines << "# - secret_#{f}" }
267
+ "\n#{lines.join("\n")}\n"
268
+ end
269
+
103
270
  def build_step(cmd)
104
271
  label, body = step_parts(cmd)
105
272
 
@@ -113,12 +280,13 @@ module Browserctl
113
280
  "# end"
114
281
  end
115
282
 
116
- url = cmd[:url].to_s
117
- if url.include?("[REDACTED]")
118
- "# NOTE: sensitive query params were redacted during recording\nstep #{label.inspect} do\n #{body}\nend"
119
- else
120
- "step #{label.inspect} do\n #{body}\nend"
121
- end
283
+ prefix = []
284
+ prefix << "# NOTE: sensitive query params were redacted during recording" \
285
+ if cmd[:url].to_s.include?("[REDACTED]")
286
+ prefix << "# fingerprint fallback: #{cmd[:fingerprint].to_json}" if cmd[:fingerprint]
287
+
288
+ head = prefix.empty? ? "" : "#{prefix.join("\n")}\n"
289
+ "#{head}step #{label.inspect} do\n #{body}\nend"
122
290
  end
123
291
 
124
292
  def step_parts(cmd)
@@ -143,8 +311,9 @@ module Browserctl
143
311
  page = cmd[:name]
144
312
  case cmd[:cmd]
145
313
  when "fill"
314
+ value_arg = cmd[:secret_field] ? "params[:secret_#{cmd[:secret_field]}]" : "params[:fill_value]"
146
315
  ["fill #{cmd[:selector]} on #{page}",
147
- "page(:#{page}).fill(#{cmd[:selector].inspect}, params[:fill_value])"]
316
+ "page(:#{page}).fill(#{cmd[:selector].inspect}, #{value_arg})"]
148
317
  when "click"
149
318
  ["click #{cmd[:selector]} on #{page}",
150
319
  "page(:#{page}).click(#{cmd[:selector].inspect})"]
@@ -152,11 +321,28 @@ module Browserctl
152
321
  end
153
322
 
154
323
  def prepare_attrs(cmd, attrs)
155
- attrs = attrs.except(:value) if cmd == "fill"
324
+ attrs = attrs.except(:capture_post_snapshot)
325
+ if cmd == "fill"
326
+ attrs = attrs.except(:value)
327
+ field = infer_secret_field(attrs[:selector])
328
+ if field
329
+ attrs[:secret_hint] = true
330
+ attrs[:secret_field] = field
331
+ end
332
+ end
156
333
  attrs[:url] = redact_url(attrs[:url]) if %w[navigate page_open].include?(cmd) && attrs[:url]
157
334
  attrs
158
335
  end
159
336
 
337
+ def infer_secret_field(selector)
338
+ return nil unless selector
339
+
340
+ match = selector.match(SECRET_FIELD_PATTERN)
341
+ return nil unless match
342
+
343
+ match[1].downcase.gsub(/[^a-z0-9]/, "_")
344
+ end
345
+
160
346
  def redact_url(url)
161
347
  uri = URI.parse(url)
162
348
  return url if uri.query.nil?
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Browserctl
4
+ module Replay
5
+ # Per-page replay context carried by PageProxy during a workflow run
6
+ # generated from a recording.
7
+ #
8
+ # Holds the recorded fingerprint for each selector that the workflow
9
+ # interacts with. When a selector-driven command fails with
10
+ # selector_not_found at replay time, the proxy looks up the fingerprint
11
+ # here and asks FingerprintMatcher to find a candidate in the live
12
+ # snapshot. The matched element's stable ref is then re-used to retry
13
+ # the original command.
14
+ #
15
+ # Drift events (rematches, threshold misses) are accumulated on the
16
+ # context so the surrounding workflow runner can render them into a
17
+ # drift report at end-of-run.
18
+ class Context
19
+ DriftEvent = Struct.new(:command, :selector, :matched_ref, :score, :reason, keyword_init: true)
20
+
21
+ attr_reader :drift_events
22
+
23
+ def initialize(fingerprints: {})
24
+ @fingerprints = fingerprints
25
+ @drift_events = []
26
+ end
27
+
28
+ def fingerprint_for(selector)
29
+ @fingerprints[selector]
30
+ end
31
+
32
+ def record(command:, selector:, matched_ref: nil, score: nil, reason: nil)
33
+ @drift_events << DriftEvent.new(
34
+ command: command, selector: selector,
35
+ matched_ref: matched_ref, score: score, reason: reason
36
+ )
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Browserctl
4
+ module Replay
5
+ # Scores candidate snapshot entries against a recorded fingerprint and
6
+ # returns the best match above a configurable threshold.
7
+ #
8
+ # Inputs are the wire-shape fingerprints emitted by Snapshot::Fingerprint:
9
+ # { text:, role:, neighbors: [...], position: { index:, depth: } }
10
+ #
11
+ # Score is a weighted sum in [0.0, 1.0]:
12
+ # text 0.40 (exact match; case-insensitive)
13
+ # role 0.20 (exact match)
14
+ # neighbors 0.25 (Jaccard over the neighbor sets)
15
+ # position 0.15 (proximity in (index, depth) space)
16
+ #
17
+ # Defaults reflect the v0.11 acceptance bar: text + role together (0.60)
18
+ # are enough to clear the default threshold, so a renamed neighbor or a
19
+ # shifted index doesn't break replay.
20
+ class FingerprintMatcher
21
+ DEFAULT_THRESHOLD = 0.6
22
+ WEIGHTS = { text: 0.40, role: 0.20, neighbors: 0.25, position: 0.15 }.freeze
23
+
24
+ Match = Struct.new(:candidate, :score, keyword_init: true)
25
+
26
+ def initialize(threshold: DEFAULT_THRESHOLD, weights: WEIGHTS)
27
+ @threshold = threshold
28
+ @weights = weights
29
+ end
30
+
31
+ # Returns the highest-scoring candidate entry above the threshold, or
32
+ # nil if no candidate qualifies. `candidates` must be an array of
33
+ # snapshot entries (hashes with a :fingerprint key). The returned
34
+ # Match wraps the candidate hash and the numeric score.
35
+ def best(target_fp, candidates)
36
+ scored = candidates
37
+ .map { |c| Match.new(candidate: c, score: score(target_fp, c[:fingerprint])) }
38
+ .sort_by { |m| -m.score }
39
+
40
+ winner = scored.first
41
+ return nil unless winner && winner.score >= @threshold
42
+
43
+ winner
44
+ end
45
+
46
+ def score(target, candidate)
47
+ return 0.0 unless target && candidate
48
+
49
+ (@weights[:text] * text_score(target[:text], candidate[:text])) +
50
+ (@weights[:role] * bool_score(target[:role] == candidate[:role])) +
51
+ (@weights[:neighbors] * jaccard(target[:neighbors], candidate[:neighbors])) +
52
+ (@weights[:position] * position_score(target[:position], candidate[:position]))
53
+ end
54
+
55
+ private
56
+
57
+ def text_score(target, candidate)
58
+ return 0.0 if target.nil? || candidate.nil? || target.empty? || candidate.empty?
59
+
60
+ target.downcase.strip == candidate.downcase.strip ? 1.0 : 0.0
61
+ end
62
+
63
+ def bool_score(flag) = flag ? 1.0 : 0.0
64
+
65
+ def jaccard(target, candidate)
66
+ target = Array(target)
67
+ candidate = Array(candidate)
68
+ return 1.0 if target.empty? && candidate.empty?
69
+ return 0.0 if target.empty? || candidate.empty?
70
+
71
+ inter = (target & candidate).size
72
+ union = (target | candidate).size
73
+ union.zero? ? 0.0 : inter.to_f / union
74
+ end
75
+
76
+ def position_score(target, candidate)
77
+ return 0.0 unless target && candidate
78
+
79
+ idx_d = (target[:index].to_i - candidate[:index].to_i).abs
80
+ depth_d = (target[:depth].to_i - candidate[:depth].to_i).abs
81
+ # Soft falloff: 1.0 when identical, ~0 once they're 4+ apart in either axis.
82
+ [1.0 - ((idx_d + depth_d) / 8.0), 0.0].max
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Browserctl
6
+ module Replay
7
+ # Stable digest + element-set comparison for post-step snapshots.
8
+ #
9
+ # The digest is intentionally cheap and stable across cosmetic DOM noise:
10
+ # only the (selector, role, tag) triples drive the hash, sorted to remove
11
+ # ordering effects. That's enough to flag structural drift (a step that
12
+ # used to land on /dashboard now lands on /login) without flapping on
13
+ # every reflow or class rename.
14
+ module SnapshotDiff
15
+ module_function
16
+
17
+ def digest(snapshot)
18
+ return nil if snapshot.nil?
19
+
20
+ keys = Array(snapshot).map { |el| identity_tuple(el) }.compact.sort
21
+ Digest::SHA1.hexdigest(keys.join("\n"))[0, 16]
22
+ end
23
+
24
+ # Returns { added: [...], removed: [...] } of element selectors that
25
+ # differ between two snapshots. Empty arrays mean structurally identical.
26
+ def compare(prev, current)
27
+ prev_set = element_set(prev)
28
+ current_set = element_set(current)
29
+ {
30
+ added: (current_set - prev_set).sort,
31
+ removed: (prev_set - current_set).sort
32
+ }
33
+ end
34
+
35
+ def identity_tuple(entry)
36
+ return nil unless entry.is_a?(Hash)
37
+
38
+ sel = entry[:selector] || entry["selector"]
39
+ role = entry[:role] || entry["role"]
40
+ tag = entry[:tag] || entry["tag"]
41
+ return nil unless sel
42
+
43
+ "#{sel}|#{role}|#{tag}"
44
+ end
45
+
46
+ def element_set(snapshot)
47
+ Array(snapshot).map { |entry| entry[:selector] || entry["selector"] }.compact
48
+ end
49
+ end
50
+ end
51
+ end