browserctl 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/README.md +2 -1
  4. data/bin/browserctl +168 -78
  5. data/bin/browserd +8 -1
  6. data/lib/browserctl/client.rb +50 -6
  7. data/lib/browserctl/commands/cli_output.rb +36 -3
  8. data/lib/browserctl/commands/flow.rb +123 -0
  9. data/lib/browserctl/commands/migrate.rb +94 -0
  10. data/lib/browserctl/commands/state.rb +193 -0
  11. data/lib/browserctl/commands/trace.rb +187 -0
  12. data/lib/browserctl/commands/workflow.rb +62 -4
  13. data/lib/browserctl/constants.rb +4 -2
  14. data/lib/browserctl/crash_report.rb +96 -0
  15. data/lib/browserctl/detectors/auth_required.rb +128 -0
  16. data/lib/browserctl/detectors.rb +2 -0
  17. data/lib/browserctl/error/codes.rb +44 -0
  18. data/lib/browserctl/error/exit_codes.rb +54 -0
  19. data/lib/browserctl/error/suggested_actions.rb +41 -0
  20. data/lib/browserctl/errors.rb +72 -12
  21. data/lib/browserctl/flow.rb +22 -1
  22. data/lib/browserctl/flow_registry.rb +66 -0
  23. data/lib/browserctl/flows/stdlib/basic_auth.rb +30 -0
  24. data/lib/browserctl/flows/stdlib/cloudflare_solve.rb +59 -0
  25. data/lib/browserctl/flows/stdlib/magic_link_email.rb +28 -0
  26. data/lib/browserctl/flows/stdlib/oauth_github.rb +28 -0
  27. data/lib/browserctl/flows/stdlib/oauth_google.rb +30 -0
  28. data/lib/browserctl/flows/stdlib/totp_2fa.rb +61 -0
  29. data/lib/browserctl/format_version.rb +37 -0
  30. data/lib/browserctl/logger.rb +102 -9
  31. data/lib/browserctl/migrations.rb +216 -0
  32. data/lib/browserctl/recording.rb +246 -28
  33. data/lib/browserctl/redactor.rb +58 -0
  34. data/lib/browserctl/replay/context.rb +40 -0
  35. data/lib/browserctl/replay/fingerprint_matcher.rb +86 -0
  36. data/lib/browserctl/replay/snapshot_diff.rb +51 -0
  37. data/lib/browserctl/replay/telemetry.rb +60 -0
  38. data/lib/browserctl/rubocop/cops/typed_error.rb +69 -0
  39. data/lib/browserctl/runner.rb +50 -10
  40. data/lib/browserctl/secret_resolver_registry.rb +23 -4
  41. data/lib/browserctl/server/command_dispatcher.rb +13 -1
  42. data/lib/browserctl/server/handlers/daemon_control.rb +5 -1
  43. data/lib/browserctl/server/handlers/error_payload.rb +27 -0
  44. data/lib/browserctl/server/handlers/interaction.rb +21 -3
  45. data/lib/browserctl/server/handlers/navigation.rb +50 -5
  46. data/lib/browserctl/server/handlers/observation.rb +43 -2
  47. data/lib/browserctl/server/handlers/state.rb +149 -0
  48. data/lib/browserctl/server/page_session.rb +9 -7
  49. data/lib/browserctl/server/snapshot_builder.rb +21 -45
  50. data/lib/browserctl/session.rb +1 -1
  51. data/lib/browserctl/snapshot/annotator.rb +75 -0
  52. data/lib/browserctl/snapshot/extractor.rb +21 -0
  53. data/lib/browserctl/snapshot/fingerprint.rb +88 -0
  54. data/lib/browserctl/snapshot/ref.rb +70 -0
  55. data/lib/browserctl/snapshot/serializer.rb +17 -0
  56. data/lib/browserctl/state/bundle.rb +283 -0
  57. data/lib/browserctl/state/transport.rb +64 -0
  58. data/lib/browserctl/state/transports/file.rb +35 -0
  59. data/lib/browserctl/state/transports/one_password.rb +67 -0
  60. data/lib/browserctl/state/transports/s3.rb +42 -0
  61. data/lib/browserctl/state.rb +208 -0
  62. data/lib/browserctl/version.rb +1 -1
  63. data/lib/browserctl/workflow/flow_wrapper.rb +81 -0
  64. data/lib/browserctl/workflow/promoter.rb +96 -0
  65. data/lib/browserctl/workflow/promotion_ledger.rb +72 -0
  66. data/lib/browserctl/workflow.rb +235 -16
  67. metadata +44 -7
@@ -8,7 +8,11 @@ module Browserctl
8
8
 
9
9
  def cmd_navigate(req)
10
10
  unless Policy.allowed_navigation?(req[:url].to_s)
11
- return { error: "navigation to '#{req[:url]}' blocked by domain policy", code: "domain_not_allowed" }
11
+ return error_payload(
12
+ code: Browserctl::Error::Codes::DOMAIN_NOT_ALLOWED,
13
+ message: "navigation to '#{req[:url]}' blocked by domain policy",
14
+ context: { url: req[:url].to_s }
15
+ )
12
16
  end
13
17
 
14
18
  with_page(req[:name]) do |session|
@@ -33,7 +37,8 @@ module Browserctl
33
37
  sel = resolve_selector_from(session, req)
34
38
  return sel if sel.is_a?(Hash)
35
39
 
36
- type_into(session.page, sel, req[:value])
40
+ result = type_into(session.page, sel, req[:value])
41
+ enrich_with_recording_metadata(result, session, sel, req)
37
42
  end
38
43
  end
39
44
 
@@ -42,17 +47,51 @@ module Browserctl
42
47
  sel = resolve_selector_from(session, req)
43
48
  return sel if sel.is_a?(Hash)
44
49
 
45
- click_element(session.page, sel)
50
+ result = click_element(session.page, sel)
51
+ enrich_with_recording_metadata(result, session, sel, req)
46
52
  end
47
53
  end
48
54
 
55
+ # Adds ref / fingerprint / snapshot_id / postcondition_hint to a successful
56
+ # click/fill response. Recording uses these to build a self-healing log.
57
+ # When req[:capture_post_snapshot] is true, also takes a fresh snapshot
58
+ # and attaches its digest so workflow run --check can diff DOM state
59
+ # against the recorded baseline.
60
+ def enrich_with_recording_metadata(result, session, selector, req)
61
+ return result unless result[:ok]
62
+
63
+ ref = req[:ref] || session.ref_registry.invert[selector]
64
+ fp = (ref && session.fingerprint_index[ref]) || session.fingerprint_index[selector]
65
+ enriched = result.merge(
66
+ ref: ref,
67
+ fingerprint: fp,
68
+ snapshot_id: session.snapshot_id,
69
+ postcondition_hint: { url: session.page.current_url }
70
+ )
71
+ enriched[:post_snapshot_digest] = capture_post_snapshot_digest(session) if req[:capture_post_snapshot]
72
+ enriched.compact
73
+ end
74
+
75
+ def capture_post_snapshot_digest(session)
76
+ snapshot = @snapshot_builder.call(session.page)
77
+ Browserctl::Replay::SnapshotDiff.digest(snapshot)
78
+ rescue StandardError
79
+ nil
80
+ end
81
+
49
82
  def cmd_url(req)
50
83
  with_page(req[:name]) { |session| { ok: true, url: session.page.current_url } }
51
84
  end
52
85
 
53
86
  def type_into(page, selector, value)
54
87
  el = page.at_css(selector)
55
- return { error: "selector not found: #{selector}" } unless el
88
+ unless el
89
+ return error_payload(
90
+ code: Browserctl::Error::Codes::SELECTOR_NOT_FOUND,
91
+ message: "selector not found: #{selector}",
92
+ context: { selector: selector }
93
+ )
94
+ end
56
95
 
57
96
  el.focus
58
97
  el.evaluate("this.select()")
@@ -62,7 +101,13 @@ module Browserctl
62
101
 
63
102
  def click_element(page, selector)
64
103
  el = page.at_css(selector)
65
- return { error: "selector not found: #{selector}" } unless el
104
+ unless el
105
+ return error_payload(
106
+ code: Browserctl::Error::Codes::SELECTOR_NOT_FOUND,
107
+ message: "selector not found: #{selector}",
108
+ context: { selector: selector }
109
+ )
110
+ end
66
111
 
67
112
  # Use the DOM native click() so JS-only event listeners fire.
68
113
  # CDP mouse simulation (el.click) dispatches events at screen coordinates
@@ -12,6 +12,30 @@ module Browserctl
12
12
  with_page(req[:name]) { |session| take_snapshot(session, req[:format], req[:diff]) }
13
13
  end
14
14
 
15
+ # Runs the auth_required detector against the page and returns either a
16
+ # plain `{ ok: true, auth_required: false }` response or a structured
17
+ # `{ error:, code: "AUTH_REQUIRED", state:, suggested_flow:, reason: }`
18
+ # error. Callers feed in cookies / suggested_flow when they have a
19
+ # bundle in hand (see PR 18); without them, only the URL signal fires.
20
+ def cmd_auth_check(req)
21
+ with_page(req[:name]) do |session|
22
+ cookies = session.page.cookies.all.values.map(&:to_h) if req[:include_cookies]
23
+ result = Browserctl::Detectors.auth_required(
24
+ session.page,
25
+ cookies: cookies,
26
+ suggested_flow: req[:suggested_flow]
27
+ )
28
+ next { ok: true, auth_required: false } unless result.triggered
29
+
30
+ Browserctl::AuthRequiredError.new(
31
+ result.reason,
32
+ state: req[:state],
33
+ suggested_flow: result.suggested_flow,
34
+ reason: result.reason
35
+ ).to_response
36
+ end
37
+ end
38
+
15
39
  def take_snapshot(session, format, diff)
16
40
  nonce = SecureRandom.hex(8)
17
41
  challenge = Detectors.cloudflare?(session.page)
@@ -20,15 +44,32 @@ module Browserctl
20
44
 
21
45
  snapshot = @snapshot_builder.call(session.page)
22
46
  registry = snapshot.to_h { |el| [el[:ref], el[:selector]] }
47
+ fp_index = build_fingerprint_index(snapshot)
23
48
 
24
49
  prev = session.prev_snapshot
25
- session.ref_registry = registry
26
- session.prev_snapshot = snapshot
50
+ session.ref_registry = registry
51
+ session.fingerprint_index = fp_index
52
+ session.snapshot_id = nonce
53
+ session.prev_snapshot = snapshot
27
54
  result = diff && prev ? compute_diff(prev, snapshot) : snapshot
28
55
 
29
56
  { ok: true, snapshot: result, challenge: challenge, nonce: nonce }
30
57
  end
31
58
 
59
+ def build_fingerprint_index(snapshot)
60
+ index = {}
61
+ snapshot.each do |el|
62
+ ref = el[:ref]
63
+ sel = el[:selector]
64
+ fp = el[:fingerprint]
65
+ next unless fp
66
+
67
+ index[ref] = fp if ref
68
+ index[sel] = fp if sel
69
+ end
70
+ index
71
+ end
72
+
32
73
  def compute_diff(prev, current)
33
74
  prev_by_sel = prev.to_h { |el| [el[:selector], el] }
34
75
  current.reject do |el|
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../../state"
5
+
6
+ module Browserctl
7
+ class CommandDispatcher
8
+ module Handlers
9
+ # Top-level state management — collapses cookies + localStorage +
10
+ # sessionStorage into a single `.bctl` bundle. See lib/browserctl/state.rb.
11
+ module State
12
+ private
13
+
14
+ def cmd_state_save(req)
15
+ first_session = @global_mutex.synchronize { @pages.values.first }
16
+ return { error: "no open pages — open a page before saving state" } unless first_session
17
+
18
+ payload, captured_origins = capture_state_payload
19
+ manifest = Browserctl::State.save(
20
+ req[:name],
21
+ payload: payload,
22
+ origins: req[:origins] || captured_origins,
23
+ flow: req[:flow],
24
+ flow_version: req[:flow_version],
25
+ passphrase: req[:passphrase]
26
+ )
27
+
28
+ {
29
+ ok: true,
30
+ path: Browserctl::State.path(req[:name]),
31
+ origins: manifest[:origins],
32
+ cookies: payload[:cookies].length,
33
+ encrypted: manifest[:encrypted]
34
+ }
35
+ rescue Browserctl::Error, ArgumentError => e
36
+ { error: e.message }
37
+ end
38
+
39
+ def cmd_state_load(req)
40
+ data = Browserctl::State.load(req[:name], passphrase: req[:passphrase])
41
+ target = @global_mutex.synchronize { @pages.values.first }
42
+ return { error: "no open pages — open a page before loading state" } unless target
43
+
44
+ cookies = pluck(data[:payload], :cookies, default: [])
45
+
46
+ unless req[:skip_auth_check]
47
+ auth = Browserctl::Detectors.auth_required(
48
+ target.page, cookies: cookies, suggested_flow: data[:manifest][:flow]
49
+ )
50
+ if auth.triggered
51
+ return Browserctl::AuthRequiredError.new(
52
+ auth.reason,
53
+ state: req[:name],
54
+ suggested_flow: auth.suggested_flow,
55
+ reason: auth.reason
56
+ ).to_response
57
+ end
58
+ end
59
+
60
+ restore_state_cookies(target, cookies)
61
+ ls_count = restore_local_storage(pluck(data[:payload], :local_storage, default: {}))
62
+
63
+ {
64
+ ok: true,
65
+ cookies: cookies.length,
66
+ local_storage_keys: ls_count,
67
+ origins: data[:manifest][:origins]
68
+ }
69
+ rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError, JSON::ParserError => e
70
+ { error: e.message }
71
+ end
72
+
73
+ def pluck(hash, sym, default:)
74
+ hash[sym] || hash[sym.to_s] || default
75
+ end
76
+
77
+ def restore_state_cookies(target, cookies)
78
+ cookies.each do |raw|
79
+ c = raw.transform_keys(&:to_sym)
80
+ target.page.cookies.set(**c.slice(:name, :value, :domain, :path))
81
+ end
82
+ end
83
+
84
+ def cmd_state_list(_req)
85
+ { ok: true, state: Browserctl::State.all }
86
+ end
87
+
88
+ def cmd_state_info(req)
89
+ { ok: true, info: Browserctl::State.info(req[:name]) }
90
+ rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError => e
91
+ { error: e.message }
92
+ end
93
+
94
+ def cmd_state_delete(req)
95
+ Browserctl::State.delete(req[:name])
96
+ { ok: true }
97
+ rescue ArgumentError => e
98
+ { error: e.message }
99
+ end
100
+
101
+ def capture_state_payload
102
+ first = @global_mutex.synchronize { @pages.values.first }
103
+ cookies = first.page.cookies.all.values.map(&:to_h)
104
+
105
+ local_storage = {}
106
+ session_storage = {}
107
+ captured_origins = []
108
+
109
+ @global_mutex.synchronize { @pages.dup }.each_value do |session|
110
+ session.mutex.synchronize do
111
+ origin = session.page.evaluate("location.origin")
112
+ ls_str = session.page.evaluate("JSON.stringify({...localStorage})") || "{}"
113
+ ss_str = session.page.evaluate("JSON.stringify({...sessionStorage})") || "{}"
114
+ local_storage[origin] = JSON.parse(ls_str)
115
+ session_storage[origin] = JSON.parse(ss_str)
116
+ captured_origins << origin
117
+ end
118
+ end
119
+
120
+ payload = {
121
+ cookies: cookies,
122
+ local_storage: local_storage,
123
+ session_storage: session_storage
124
+ }
125
+ [payload, captured_origins.uniq]
126
+ end
127
+
128
+ def restore_local_storage(local_storage)
129
+ count = 0
130
+ local_storage.each do |origin, keys|
131
+ next if keys.nil? || keys.empty?
132
+
133
+ tmp_page = @driver.create_page
134
+ begin
135
+ tmp_page.go_to(origin.to_s)
136
+ keys.each do |k, v|
137
+ tmp_page.evaluate("localStorage.setItem(#{k.to_json}, #{v.to_json})")
138
+ count += 1
139
+ end
140
+ ensure
141
+ tmp_page.close
142
+ end
143
+ end
144
+ count
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -3,15 +3,17 @@
3
3
  module Browserctl
4
4
  class PageSession
5
5
  attr_reader :page, :mutex, :pause_cv
6
- attr_accessor :ref_registry, :prev_snapshot
6
+ attr_accessor :ref_registry, :prev_snapshot, :fingerprint_index, :snapshot_id
7
7
 
8
8
  def initialize(page)
9
- @page = page
10
- @mutex = Mutex.new
11
- @pause_cv = ConditionVariable.new
12
- @ref_registry = {}
13
- @prev_snapshot = nil
14
- @paused = false
9
+ @page = page
10
+ @mutex = Mutex.new
11
+ @pause_cv = ConditionVariable.new
12
+ @ref_registry = {}
13
+ @fingerprint_index = {}
14
+ @snapshot_id = nil
15
+ @prev_snapshot = nil
16
+ @paused = false
15
17
  end
16
18
 
17
19
  def paused? = @paused
@@ -1,55 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "browserctl/snapshot/extractor"
4
+ require "browserctl/snapshot/annotator"
5
+ require "browserctl/snapshot/serializer"
4
6
 
5
7
  module Browserctl
8
+ # Orchestrates the snapshot pipeline:
9
+ #
10
+ # page.body ──Extractor──▶ [nodes]
11
+ # ──Annotator──▶ [entries with ref + fingerprint]
12
+ # ──Serializer─▶ wire-shape array
13
+ #
14
+ # Each stage is independently testable. Inject alternates via the keyword
15
+ # args for tests that want to isolate one stage.
6
16
  class SnapshotBuilder
7
- INTERACTABLE = %w[a button input select textarea
8
- [role=button] [role=link] [role=menuitem]].freeze
9
- ATTRS = %w[type name placeholder href aria-label role].freeze
10
-
11
- def call(page)
12
- doc = Nokogiri::HTML(page.body)
13
- ref = 0
14
- doc.css(INTERACTABLE.join(",")).map { |el| element_entry(el, ref += 1) }
15
- end
16
-
17
- private
18
-
19
- def element_entry(elem, ref)
20
- { ref: "e#{ref}", tag: elem.name, text: elem.text.strip.slice(0, 80),
21
- selector: css_path(elem), attrs: element_attrs(elem) }
22
- end
23
-
24
- def element_attrs(elem)
25
- elem.attributes.transform_values(&:value).slice(*ATTRS)
26
- end
27
-
28
- def css_path(node)
29
- ancestors_until_html(node).map { |n| path_segment(n) }.join(" > ")
17
+ def initialize(extractor: Snapshot::Extractor.new,
18
+ annotator: Snapshot::Annotator.new,
19
+ serializer: Snapshot::Serializer.new)
20
+ @extractor = extractor
21
+ @annotator = annotator
22
+ @serializer = serializer
30
23
  end
31
24
 
32
- def ancestors_until_html(node)
33
- [].tap do |acc|
34
- while node && node.name != "html"
35
- acc.unshift(node)
36
- node = node.parent
37
- end
38
- end
39
- end
40
-
41
- def path_segment(node)
42
- node.name + id_fragment(node) + class_fragment(node)
43
- end
44
-
45
- def id_fragment(node)
46
- (id = node["id"]) && !id.empty? ? "##{id}" : ""
47
- end
48
-
49
- def class_fragment(node)
50
- return "" if node["id"] && !node["id"].empty?
51
-
52
- (klass = node["class"]&.split&.first) ? ".#{klass}" : ""
25
+ def call(page)
26
+ nodes = @extractor.call(page.body)
27
+ entries = @annotator.call(nodes)
28
+ @serializer.call(entries)
53
29
  end
54
30
  end
55
31
  end
@@ -71,7 +71,7 @@ module Browserctl
71
71
  def self.load(session_name)
72
72
  validate_name!(session_name)
73
73
  dir = path(session_name)
74
- raise "session '#{session_name}' not found" unless Dir.exist?(dir)
74
+ raise Browserctl::Error, "session '#{session_name}' not found" unless Dir.exist?(dir)
75
75
 
76
76
  meta = JSON.parse(File.read(File.join(dir, "metadata.json")), symbolize_names: true)
77
77
 
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "browserctl/snapshot/ref"
4
+ require "browserctl/snapshot/fingerprint"
5
+
6
+ module Browserctl
7
+ module Snapshot
8
+ # Stage 2 of the snapshot pipeline.
9
+ #
10
+ # Takes the list of interactable nodes from Extractor and produces
11
+ # element entries with stable refs, semantic metadata, a CSS selector
12
+ # path, and a fingerprint. Each entry is a plain Hash.
13
+ class Annotator
14
+ ATTRS = %w[type name placeholder href aria-label role].freeze
15
+
16
+ def initialize(ref_deriver: RefDeriver.new, fingerprint: Fingerprint.new)
17
+ @ref_deriver = ref_deriver
18
+ @fingerprint = fingerprint
19
+ end
20
+
21
+ def call(nodes)
22
+ taken = {}
23
+ nodes.map do |node|
24
+ ref = @ref_deriver.disambiguate(@ref_deriver.derive(node), taken)
25
+ taken[ref] = true
26
+ entry(node, ref)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def entry(node, ref)
33
+ {
34
+ ref: ref,
35
+ tag: node.name,
36
+ text: node.text.strip.slice(0, 80),
37
+ selector: css_path(node),
38
+ attrs: attrs(node),
39
+ fingerprint: @fingerprint.build(node)
40
+ }
41
+ end
42
+
43
+ def attrs(node)
44
+ node.attributes.transform_values(&:value).slice(*ATTRS)
45
+ end
46
+
47
+ def css_path(node)
48
+ ancestors_until_html(node).map { |n| segment(n) }.join(" > ")
49
+ end
50
+
51
+ def ancestors_until_html(node)
52
+ [].tap do |acc|
53
+ while node && node.name != "html"
54
+ acc.unshift(node)
55
+ node = node.parent
56
+ end
57
+ end
58
+ end
59
+
60
+ def segment(node)
61
+ node.name + id_fragment(node) + class_fragment(node)
62
+ end
63
+
64
+ def id_fragment(node)
65
+ (id = node["id"]) && !id.empty? ? "##{id}" : ""
66
+ end
67
+
68
+ def class_fragment(node)
69
+ return "" if node["id"] && !node["id"].empty?
70
+
71
+ (klass = node["class"]&.split&.first) ? ".#{klass}" : ""
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Browserctl
6
+ module Snapshot
7
+ # Stage 1 of the snapshot pipeline.
8
+ #
9
+ # Parses raw HTML and returns the set of interactable Nokogiri nodes
10
+ # that the rest of the pipeline will annotate. This stage knows nothing
11
+ # about refs, fingerprints, or wire format.
12
+ class Extractor
13
+ INTERACTABLE = %w[a button input select textarea
14
+ [role=button] [role=link] [role=menuitem]].freeze
15
+
16
+ def call(html)
17
+ Nokogiri::HTML(html).css(INTERACTABLE.join(",")).to_a
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "browserctl/snapshot/ref"
4
+
5
+ module Browserctl
6
+ module Snapshot
7
+ # Builds a per-element fingerprint that survives small DOM changes.
8
+ #
9
+ # The fingerprint is later used by the replay layer to rematch an
10
+ # element when its recorded selector no longer resolves: score the
11
+ # candidate elements in the new DOM against the recorded fingerprint
12
+ # and pick the best match above a threshold.
13
+ #
14
+ # Shape:
15
+ # {
16
+ # text: <accessible name>,
17
+ # role: <ARIA role, explicit or implicit>,
18
+ # neighbors: [<short text of nearby siblings>, ...],
19
+ # position: { index: <int>, depth: <int> }
20
+ # }
21
+ class Fingerprint
22
+ NEIGHBOR_RADIUS = 2 # siblings to capture on each side
23
+ NEIGHBOR_TEXT_LEN = 40
24
+
25
+ def initialize(ref_deriver: RefDeriver.new)
26
+ @ref_deriver = ref_deriver
27
+ end
28
+
29
+ def build(node)
30
+ {
31
+ text: accessible_name(node),
32
+ role: role(node),
33
+ neighbors: neighbors(node),
34
+ position: position(node)
35
+ }
36
+ end
37
+
38
+ private
39
+
40
+ def role(node)
41
+ explicit = node["role"]
42
+ return explicit if explicit && !explicit.empty?
43
+
44
+ RefDeriver::IMPLICIT_ROLE[node.name] || node.name
45
+ end
46
+
47
+ def accessible_name(node)
48
+ %w[aria-label placeholder alt title].each do |attr|
49
+ v = node[attr]
50
+ return v.strip if v && !v.strip.empty?
51
+ end
52
+ node.text.to_s.strip.slice(0, 80)
53
+ end
54
+
55
+ def neighbors(node)
56
+ parent = node.parent
57
+ return [] unless parent.respond_to?(:children)
58
+
59
+ idx = parent.children.to_a.index(node) || 0
60
+ window = parent.children.to_a[[idx - NEIGHBOR_RADIUS, 0].max...(idx + NEIGHBOR_RADIUS + 1)] || []
61
+ window
62
+ .reject { |c| c == node || !c.respond_to?(:name) }
63
+ .map { |c| neighbor_signal(c) }
64
+ .reject(&:empty?)
65
+ end
66
+
67
+ def neighbor_signal(node)
68
+ text = node.text.to_s.strip.gsub(/\s+/, " ").slice(0, NEIGHBOR_TEXT_LEN)
69
+ text.empty? ? "" : "#{node.name}:#{text}"
70
+ end
71
+
72
+ def position(node)
73
+ idx = node.parent.respond_to?(:children) ? (node.parent.children.to_a.index(node) || 0) : 0
74
+ { index: idx, depth: depth(node) }
75
+ end
76
+
77
+ def depth(node)
78
+ d = 0
79
+ cur = node.parent
80
+ while cur.respond_to?(:name) && cur.name != "document"
81
+ d += 1
82
+ cur = cur.parent
83
+ end
84
+ d
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Browserctl
6
+ module Snapshot
7
+ # Derives a stable element ref from semantic + structural signals.
8
+ #
9
+ # The same DOM element should produce the same ref across two snapshots
10
+ # of the same page. Inputs to the hash are:
11
+ # - role (explicit @role, else implicit ARIA role from tag)
12
+ # - accessible name (aria-label || text || placeholder || alt)
13
+ # - tag
14
+ # - parent path (chain of ancestor tag names up to <html>)
15
+ #
16
+ # Collisions within a single snapshot are disambiguated by the caller via
17
+ # `disambiguate(ref, taken)` — the deriver itself is pure.
18
+ class RefDeriver
19
+ IMPLICIT_ROLE = {
20
+ "a" => "link", "button" => "button", "input" => "textbox",
21
+ "select" => "combobox", "textarea" => "textbox"
22
+ }.freeze
23
+
24
+ HASH_LEN = 7
25
+
26
+ def derive(node)
27
+ signal = [role(node), accessible_name(node), node.name, parent_path(node)].join("|")
28
+ "e#{Digest::SHA256.hexdigest(signal)[0, HASH_LEN]}"
29
+ end
30
+
31
+ # Given a candidate ref and a set of already-taken refs in the current
32
+ # snapshot, return a unique ref. Adds `-2`, `-3`, ... as needed.
33
+ def disambiguate(ref, taken)
34
+ return ref unless taken.include?(ref)
35
+
36
+ n = 2
37
+ n += 1 while taken.include?("#{ref}-#{n}")
38
+ "#{ref}-#{n}"
39
+ end
40
+
41
+ private
42
+
43
+ def role(node)
44
+ explicit = node["role"]
45
+ return explicit if explicit && !explicit.empty?
46
+
47
+ IMPLICIT_ROLE[node.name] || node.name
48
+ end
49
+
50
+ def accessible_name(node)
51
+ %w[aria-label placeholder alt title].each do |attr|
52
+ v = node[attr]
53
+ return v.strip if v && !v.strip.empty?
54
+ end
55
+ text = node.text.to_s.strip
56
+ text.empty? ? "" : text.slice(0, 80)
57
+ end
58
+
59
+ def parent_path(node)
60
+ parts = []
61
+ cur = node.parent
62
+ while cur.respond_to?(:name) && cur.name != "html" && cur.name != "document"
63
+ parts.unshift(cur.name)
64
+ cur = cur.parent
65
+ end
66
+ parts.join(">")
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Browserctl
4
+ module Snapshot
5
+ # Stage 3 of the snapshot pipeline.
6
+ #
7
+ # Right now this is the identity function — annotated entries are
8
+ # already in the wire shape clients expect. It exists as a seam so
9
+ # later milestones can canonicalize, redact, or compress without
10
+ # touching extraction or annotation.
11
+ class Serializer
12
+ def call(entries)
13
+ entries
14
+ end
15
+ end
16
+ end
17
+ end