browserctl 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +1 -1
- data/bin/browserctl +45 -4
- data/lib/browserctl/client.rb +47 -3
- data/lib/browserctl/commands/cli_output.rb +16 -3
- data/lib/browserctl/commands/flow.rb +123 -0
- data/lib/browserctl/commands/state.rb +193 -0
- data/lib/browserctl/commands/workflow.rb +62 -4
- data/lib/browserctl/constants.rb +1 -1
- data/lib/browserctl/detectors/auth_required.rb +128 -0
- data/lib/browserctl/detectors.rb +2 -0
- data/lib/browserctl/errors.rb +30 -0
- data/lib/browserctl/flow.rb +22 -1
- data/lib/browserctl/flow_registry.rb +66 -0
- data/lib/browserctl/flows/stdlib/basic_auth.rb +30 -0
- data/lib/browserctl/flows/stdlib/cloudflare_solve.rb +59 -0
- data/lib/browserctl/flows/stdlib/magic_link_email.rb +28 -0
- data/lib/browserctl/flows/stdlib/oauth_github.rb +28 -0
- data/lib/browserctl/flows/stdlib/oauth_google.rb +30 -0
- data/lib/browserctl/flows/stdlib/totp_2fa.rb +61 -0
- data/lib/browserctl/recording.rb +212 -26
- data/lib/browserctl/replay/context.rb +40 -0
- data/lib/browserctl/replay/fingerprint_matcher.rb +86 -0
- data/lib/browserctl/replay/snapshot_diff.rb +51 -0
- data/lib/browserctl/replay/telemetry.rb +60 -0
- data/lib/browserctl/runner.rb +38 -4
- data/lib/browserctl/server/command_dispatcher.rb +10 -1
- data/lib/browserctl/server/handlers/interaction.rb +3 -3
- data/lib/browserctl/server/handlers/navigation.rb +33 -4
- data/lib/browserctl/server/handlers/observation.rb +43 -2
- data/lib/browserctl/server/handlers/state.rb +149 -0
- data/lib/browserctl/server/page_session.rb +9 -7
- data/lib/browserctl/server/snapshot_builder.rb +21 -45
- data/lib/browserctl/snapshot/annotator.rb +75 -0
- data/lib/browserctl/snapshot/extractor.rb +21 -0
- data/lib/browserctl/snapshot/fingerprint.rb +88 -0
- data/lib/browserctl/snapshot/ref.rb +70 -0
- data/lib/browserctl/snapshot/serializer.rb +17 -0
- data/lib/browserctl/state/bundle.rb +242 -0
- data/lib/browserctl/state/transport.rb +64 -0
- data/lib/browserctl/state/transports/file.rb +35 -0
- data/lib/browserctl/state/transports/one_password.rb +67 -0
- data/lib/browserctl/state/transports/s3.rb +42 -0
- data/lib/browserctl/state.rb +208 -0
- data/lib/browserctl/version.rb +1 -1
- data/lib/browserctl/workflow/flow_wrapper.rb +81 -0
- data/lib/browserctl/workflow/promoter.rb +96 -0
- data/lib/browserctl/workflow/promotion_ledger.rb +72 -0
- data/lib/browserctl/workflow.rb +180 -16
- metadata +31 -2
|
@@ -27,7 +27,7 @@ module Browserctl
|
|
|
27
27
|
"return { x: r.left + r.width / 2, y: r.top + r.height / 2 }; " \
|
|
28
28
|
"})(#{sel.to_json})"
|
|
29
29
|
)
|
|
30
|
-
return { error: "selector not found: #{sel}" } unless coords
|
|
30
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless coords
|
|
31
31
|
|
|
32
32
|
session.page.mouse.move(x: coords["x"], y: coords["y"])
|
|
33
33
|
{ ok: true }
|
|
@@ -43,7 +43,7 @@ module Browserctl
|
|
|
43
43
|
return sel if sel.is_a?(Hash)
|
|
44
44
|
|
|
45
45
|
el = session.page.at_css(sel)
|
|
46
|
-
return { error: "selector not found: #{sel}" } unless el
|
|
46
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
|
|
47
47
|
|
|
48
48
|
el.select_file(path)
|
|
49
49
|
{ ok: true }
|
|
@@ -56,7 +56,7 @@ module Browserctl
|
|
|
56
56
|
return sel if sel.is_a?(Hash)
|
|
57
57
|
|
|
58
58
|
el = session.page.at_css(sel)
|
|
59
|
-
return { error: "selector not found: #{sel}" } unless el
|
|
59
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
|
|
60
60
|
|
|
61
61
|
el.evaluate(
|
|
62
62
|
"this.value = #{req[:value].to_json}; " \
|
|
@@ -33,7 +33,8 @@ module Browserctl
|
|
|
33
33
|
sel = resolve_selector_from(session, req)
|
|
34
34
|
return sel if sel.is_a?(Hash)
|
|
35
35
|
|
|
36
|
-
type_into(session.page, sel, req[:value])
|
|
36
|
+
result = type_into(session.page, sel, req[:value])
|
|
37
|
+
enrich_with_recording_metadata(result, session, sel, req)
|
|
37
38
|
end
|
|
38
39
|
end
|
|
39
40
|
|
|
@@ -42,17 +43,45 @@ module Browserctl
|
|
|
42
43
|
sel = resolve_selector_from(session, req)
|
|
43
44
|
return sel if sel.is_a?(Hash)
|
|
44
45
|
|
|
45
|
-
click_element(session.page, sel)
|
|
46
|
+
result = click_element(session.page, sel)
|
|
47
|
+
enrich_with_recording_metadata(result, session, sel, req)
|
|
46
48
|
end
|
|
47
49
|
end
|
|
48
50
|
|
|
51
|
+
# Adds ref / fingerprint / snapshot_id / postcondition_hint to a successful
|
|
52
|
+
# click/fill response. Recording uses these to build a self-healing log.
|
|
53
|
+
# When req[:capture_post_snapshot] is true, also takes a fresh snapshot
|
|
54
|
+
# and attaches its digest so workflow run --check can diff DOM state
|
|
55
|
+
# against the recorded baseline.
|
|
56
|
+
def enrich_with_recording_metadata(result, session, selector, req)
|
|
57
|
+
return result unless result[:ok]
|
|
58
|
+
|
|
59
|
+
ref = req[:ref] || session.ref_registry.invert[selector]
|
|
60
|
+
fp = (ref && session.fingerprint_index[ref]) || session.fingerprint_index[selector]
|
|
61
|
+
enriched = result.merge(
|
|
62
|
+
ref: ref,
|
|
63
|
+
fingerprint: fp,
|
|
64
|
+
snapshot_id: session.snapshot_id,
|
|
65
|
+
postcondition_hint: { url: session.page.current_url }
|
|
66
|
+
)
|
|
67
|
+
enriched[:post_snapshot_digest] = capture_post_snapshot_digest(session) if req[:capture_post_snapshot]
|
|
68
|
+
enriched.compact
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def capture_post_snapshot_digest(session)
|
|
72
|
+
snapshot = @snapshot_builder.call(session.page)
|
|
73
|
+
Browserctl::Replay::SnapshotDiff.digest(snapshot)
|
|
74
|
+
rescue StandardError
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
49
78
|
def cmd_url(req)
|
|
50
79
|
with_page(req[:name]) { |session| { ok: true, url: session.page.current_url } }
|
|
51
80
|
end
|
|
52
81
|
|
|
53
82
|
def type_into(page, selector, value)
|
|
54
83
|
el = page.at_css(selector)
|
|
55
|
-
return { error: "selector not found: #{selector}" } unless el
|
|
84
|
+
return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
|
|
56
85
|
|
|
57
86
|
el.focus
|
|
58
87
|
el.evaluate("this.select()")
|
|
@@ -62,7 +91,7 @@ module Browserctl
|
|
|
62
91
|
|
|
63
92
|
def click_element(page, selector)
|
|
64
93
|
el = page.at_css(selector)
|
|
65
|
-
return { error: "selector not found: #{selector}" } unless el
|
|
94
|
+
return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
|
|
66
95
|
|
|
67
96
|
# Use the DOM native click() so JS-only event listeners fire.
|
|
68
97
|
# CDP mouse simulation (el.click) dispatches events at screen coordinates
|
|
@@ -12,6 +12,30 @@ module Browserctl
|
|
|
12
12
|
with_page(req[:name]) { |session| take_snapshot(session, req[:format], req[:diff]) }
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Runs the auth_required detector against the page and returns either a
|
|
16
|
+
# plain `{ ok: true, auth_required: false }` response or a structured
|
|
17
|
+
# `{ error:, code: "AUTH_REQUIRED", state:, suggested_flow:, reason: }`
|
|
18
|
+
# error. Callers feed in cookies / suggested_flow when they have a
|
|
19
|
+
# bundle in hand (see PR 18); without them, only the URL signal fires.
|
|
20
|
+
def cmd_auth_check(req)
|
|
21
|
+
with_page(req[:name]) do |session|
|
|
22
|
+
cookies = session.page.cookies.all.values.map(&:to_h) if req[:include_cookies]
|
|
23
|
+
result = Browserctl::Detectors.auth_required(
|
|
24
|
+
session.page,
|
|
25
|
+
cookies: cookies,
|
|
26
|
+
suggested_flow: req[:suggested_flow]
|
|
27
|
+
)
|
|
28
|
+
next { ok: true, auth_required: false } unless result.triggered
|
|
29
|
+
|
|
30
|
+
Browserctl::AuthRequiredError.new(
|
|
31
|
+
result.reason,
|
|
32
|
+
state: req[:state],
|
|
33
|
+
suggested_flow: result.suggested_flow,
|
|
34
|
+
reason: result.reason
|
|
35
|
+
).to_response
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
15
39
|
def take_snapshot(session, format, diff)
|
|
16
40
|
nonce = SecureRandom.hex(8)
|
|
17
41
|
challenge = Detectors.cloudflare?(session.page)
|
|
@@ -20,15 +44,32 @@ module Browserctl
|
|
|
20
44
|
|
|
21
45
|
snapshot = @snapshot_builder.call(session.page)
|
|
22
46
|
registry = snapshot.to_h { |el| [el[:ref], el[:selector]] }
|
|
47
|
+
fp_index = build_fingerprint_index(snapshot)
|
|
23
48
|
|
|
24
49
|
prev = session.prev_snapshot
|
|
25
|
-
session.ref_registry
|
|
26
|
-
session.
|
|
50
|
+
session.ref_registry = registry
|
|
51
|
+
session.fingerprint_index = fp_index
|
|
52
|
+
session.snapshot_id = nonce
|
|
53
|
+
session.prev_snapshot = snapshot
|
|
27
54
|
result = diff && prev ? compute_diff(prev, snapshot) : snapshot
|
|
28
55
|
|
|
29
56
|
{ ok: true, snapshot: result, challenge: challenge, nonce: nonce }
|
|
30
57
|
end
|
|
31
58
|
|
|
59
|
+
def build_fingerprint_index(snapshot)
|
|
60
|
+
index = {}
|
|
61
|
+
snapshot.each do |el|
|
|
62
|
+
ref = el[:ref]
|
|
63
|
+
sel = el[:selector]
|
|
64
|
+
fp = el[:fingerprint]
|
|
65
|
+
next unless fp
|
|
66
|
+
|
|
67
|
+
index[ref] = fp if ref
|
|
68
|
+
index[sel] = fp if sel
|
|
69
|
+
end
|
|
70
|
+
index
|
|
71
|
+
end
|
|
72
|
+
|
|
32
73
|
def compute_diff(prev, current)
|
|
33
74
|
prev_by_sel = prev.to_h { |el| [el[:selector], el] }
|
|
34
75
|
current.reject do |el|
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "../../state"
|
|
5
|
+
|
|
6
|
+
module Browserctl
|
|
7
|
+
class CommandDispatcher
|
|
8
|
+
module Handlers
|
|
9
|
+
# Top-level state management — collapses cookies + localStorage +
|
|
10
|
+
# sessionStorage into a single `.bctl` bundle. See lib/browserctl/state.rb.
|
|
11
|
+
module State
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def cmd_state_save(req)
|
|
15
|
+
first_session = @global_mutex.synchronize { @pages.values.first }
|
|
16
|
+
return { error: "no open pages — open a page before saving state" } unless first_session
|
|
17
|
+
|
|
18
|
+
payload, captured_origins = capture_state_payload
|
|
19
|
+
manifest = Browserctl::State.save(
|
|
20
|
+
req[:name],
|
|
21
|
+
payload: payload,
|
|
22
|
+
origins: req[:origins] || captured_origins,
|
|
23
|
+
flow: req[:flow],
|
|
24
|
+
flow_version: req[:flow_version],
|
|
25
|
+
passphrase: req[:passphrase]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
ok: true,
|
|
30
|
+
path: Browserctl::State.path(req[:name]),
|
|
31
|
+
origins: manifest[:origins],
|
|
32
|
+
cookies: payload[:cookies].length,
|
|
33
|
+
encrypted: manifest[:encrypted]
|
|
34
|
+
}
|
|
35
|
+
rescue Browserctl::Error, ArgumentError => e
|
|
36
|
+
{ error: e.message }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def cmd_state_load(req)
|
|
40
|
+
data = Browserctl::State.load(req[:name], passphrase: req[:passphrase])
|
|
41
|
+
target = @global_mutex.synchronize { @pages.values.first }
|
|
42
|
+
return { error: "no open pages — open a page before loading state" } unless target
|
|
43
|
+
|
|
44
|
+
cookies = pluck(data[:payload], :cookies, default: [])
|
|
45
|
+
|
|
46
|
+
unless req[:skip_auth_check]
|
|
47
|
+
auth = Browserctl::Detectors.auth_required(
|
|
48
|
+
target.page, cookies: cookies, suggested_flow: data[:manifest][:flow]
|
|
49
|
+
)
|
|
50
|
+
if auth.triggered
|
|
51
|
+
return Browserctl::AuthRequiredError.new(
|
|
52
|
+
auth.reason,
|
|
53
|
+
state: req[:name],
|
|
54
|
+
suggested_flow: auth.suggested_flow,
|
|
55
|
+
reason: auth.reason
|
|
56
|
+
).to_response
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
restore_state_cookies(target, cookies)
|
|
61
|
+
ls_count = restore_local_storage(pluck(data[:payload], :local_storage, default: {}))
|
|
62
|
+
|
|
63
|
+
{
|
|
64
|
+
ok: true,
|
|
65
|
+
cookies: cookies.length,
|
|
66
|
+
local_storage_keys: ls_count,
|
|
67
|
+
origins: data[:manifest][:origins]
|
|
68
|
+
}
|
|
69
|
+
rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError, JSON::ParserError => e
|
|
70
|
+
{ error: e.message }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def pluck(hash, sym, default:)
|
|
74
|
+
hash[sym] || hash[sym.to_s] || default
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def restore_state_cookies(target, cookies)
|
|
78
|
+
cookies.each do |raw|
|
|
79
|
+
c = raw.transform_keys(&:to_sym)
|
|
80
|
+
target.page.cookies.set(**c.slice(:name, :value, :domain, :path))
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def cmd_state_list(_req)
|
|
85
|
+
{ ok: true, state: Browserctl::State.all }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def cmd_state_info(req)
|
|
89
|
+
{ ok: true, info: Browserctl::State.info(req[:name]) }
|
|
90
|
+
rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError => e
|
|
91
|
+
{ error: e.message }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def cmd_state_delete(req)
|
|
95
|
+
Browserctl::State.delete(req[:name])
|
|
96
|
+
{ ok: true }
|
|
97
|
+
rescue ArgumentError => e
|
|
98
|
+
{ error: e.message }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def capture_state_payload
|
|
102
|
+
first = @global_mutex.synchronize { @pages.values.first }
|
|
103
|
+
cookies = first.page.cookies.all.values.map(&:to_h)
|
|
104
|
+
|
|
105
|
+
local_storage = {}
|
|
106
|
+
session_storage = {}
|
|
107
|
+
captured_origins = []
|
|
108
|
+
|
|
109
|
+
@global_mutex.synchronize { @pages.dup }.each_value do |session|
|
|
110
|
+
session.mutex.synchronize do
|
|
111
|
+
origin = session.page.evaluate("location.origin")
|
|
112
|
+
ls_str = session.page.evaluate("JSON.stringify({...localStorage})") || "{}"
|
|
113
|
+
ss_str = session.page.evaluate("JSON.stringify({...sessionStorage})") || "{}"
|
|
114
|
+
local_storage[origin] = JSON.parse(ls_str)
|
|
115
|
+
session_storage[origin] = JSON.parse(ss_str)
|
|
116
|
+
captured_origins << origin
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
payload = {
|
|
121
|
+
cookies: cookies,
|
|
122
|
+
local_storage: local_storage,
|
|
123
|
+
session_storage: session_storage
|
|
124
|
+
}
|
|
125
|
+
[payload, captured_origins.uniq]
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def restore_local_storage(local_storage)
|
|
129
|
+
count = 0
|
|
130
|
+
local_storage.each do |origin, keys|
|
|
131
|
+
next if keys.nil? || keys.empty?
|
|
132
|
+
|
|
133
|
+
tmp_page = @driver.create_page
|
|
134
|
+
begin
|
|
135
|
+
tmp_page.go_to(origin.to_s)
|
|
136
|
+
keys.each do |k, v|
|
|
137
|
+
tmp_page.evaluate("localStorage.setItem(#{k.to_json}, #{v.to_json})")
|
|
138
|
+
count += 1
|
|
139
|
+
end
|
|
140
|
+
ensure
|
|
141
|
+
tmp_page.close
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
count
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -3,15 +3,17 @@
|
|
|
3
3
|
module Browserctl
|
|
4
4
|
class PageSession
|
|
5
5
|
attr_reader :page, :mutex, :pause_cv
|
|
6
|
-
attr_accessor :ref_registry, :prev_snapshot
|
|
6
|
+
attr_accessor :ref_registry, :prev_snapshot, :fingerprint_index, :snapshot_id
|
|
7
7
|
|
|
8
8
|
def initialize(page)
|
|
9
|
-
@page
|
|
10
|
-
@mutex
|
|
11
|
-
@pause_cv
|
|
12
|
-
@ref_registry
|
|
13
|
-
@
|
|
14
|
-
@
|
|
9
|
+
@page = page
|
|
10
|
+
@mutex = Mutex.new
|
|
11
|
+
@pause_cv = ConditionVariable.new
|
|
12
|
+
@ref_registry = {}
|
|
13
|
+
@fingerprint_index = {}
|
|
14
|
+
@snapshot_id = nil
|
|
15
|
+
@prev_snapshot = nil
|
|
16
|
+
@paused = false
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def paused? = @paused
|
|
@@ -1,55 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "browserctl/snapshot/extractor"
|
|
4
|
+
require "browserctl/snapshot/annotator"
|
|
5
|
+
require "browserctl/snapshot/serializer"
|
|
4
6
|
|
|
5
7
|
module Browserctl
|
|
8
|
+
# Orchestrates the snapshot pipeline:
|
|
9
|
+
#
|
|
10
|
+
# page.body ──Extractor──▶ [nodes]
|
|
11
|
+
# ──Annotator──▶ [entries with ref + fingerprint]
|
|
12
|
+
# ──Serializer─▶ wire-shape array
|
|
13
|
+
#
|
|
14
|
+
# Each stage is independently testable. Inject alternates via the keyword
|
|
15
|
+
# args for tests that want to isolate one stage.
|
|
6
16
|
class SnapshotBuilder
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
ref = 0
|
|
14
|
-
doc.css(INTERACTABLE.join(",")).map { |el| element_entry(el, ref += 1) }
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
private
|
|
18
|
-
|
|
19
|
-
def element_entry(elem, ref)
|
|
20
|
-
{ ref: "e#{ref}", tag: elem.name, text: elem.text.strip.slice(0, 80),
|
|
21
|
-
selector: css_path(elem), attrs: element_attrs(elem) }
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def element_attrs(elem)
|
|
25
|
-
elem.attributes.transform_values(&:value).slice(*ATTRS)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def css_path(node)
|
|
29
|
-
ancestors_until_html(node).map { |n| path_segment(n) }.join(" > ")
|
|
17
|
+
def initialize(extractor: Snapshot::Extractor.new,
|
|
18
|
+
annotator: Snapshot::Annotator.new,
|
|
19
|
+
serializer: Snapshot::Serializer.new)
|
|
20
|
+
@extractor = extractor
|
|
21
|
+
@annotator = annotator
|
|
22
|
+
@serializer = serializer
|
|
30
23
|
end
|
|
31
24
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
node = node.parent
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def path_segment(node)
|
|
42
|
-
node.name + id_fragment(node) + class_fragment(node)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def id_fragment(node)
|
|
46
|
-
(id = node["id"]) && !id.empty? ? "##{id}" : ""
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def class_fragment(node)
|
|
50
|
-
return "" if node["id"] && !node["id"].empty?
|
|
51
|
-
|
|
52
|
-
(klass = node["class"]&.split&.first) ? ".#{klass}" : ""
|
|
25
|
+
def call(page)
|
|
26
|
+
nodes = @extractor.call(page.body)
|
|
27
|
+
entries = @annotator.call(nodes)
|
|
28
|
+
@serializer.call(entries)
|
|
53
29
|
end
|
|
54
30
|
end
|
|
55
31
|
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "browserctl/snapshot/ref"
|
|
4
|
+
require "browserctl/snapshot/fingerprint"
|
|
5
|
+
|
|
6
|
+
module Browserctl
|
|
7
|
+
module Snapshot
|
|
8
|
+
# Stage 2 of the snapshot pipeline.
|
|
9
|
+
#
|
|
10
|
+
# Takes the list of interactable nodes from Extractor and produces
|
|
11
|
+
# element entries with stable refs, semantic metadata, a CSS selector
|
|
12
|
+
# path, and a fingerprint. Each entry is a plain Hash.
|
|
13
|
+
class Annotator
|
|
14
|
+
ATTRS = %w[type name placeholder href aria-label role].freeze
|
|
15
|
+
|
|
16
|
+
def initialize(ref_deriver: RefDeriver.new, fingerprint: Fingerprint.new)
|
|
17
|
+
@ref_deriver = ref_deriver
|
|
18
|
+
@fingerprint = fingerprint
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def call(nodes)
|
|
22
|
+
taken = {}
|
|
23
|
+
nodes.map do |node|
|
|
24
|
+
ref = @ref_deriver.disambiguate(@ref_deriver.derive(node), taken)
|
|
25
|
+
taken[ref] = true
|
|
26
|
+
entry(node, ref)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def entry(node, ref)
|
|
33
|
+
{
|
|
34
|
+
ref: ref,
|
|
35
|
+
tag: node.name,
|
|
36
|
+
text: node.text.strip.slice(0, 80),
|
|
37
|
+
selector: css_path(node),
|
|
38
|
+
attrs: attrs(node),
|
|
39
|
+
fingerprint: @fingerprint.build(node)
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def attrs(node)
|
|
44
|
+
node.attributes.transform_values(&:value).slice(*ATTRS)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def css_path(node)
|
|
48
|
+
ancestors_until_html(node).map { |n| segment(n) }.join(" > ")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ancestors_until_html(node)
|
|
52
|
+
[].tap do |acc|
|
|
53
|
+
while node && node.name != "html"
|
|
54
|
+
acc.unshift(node)
|
|
55
|
+
node = node.parent
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def segment(node)
|
|
61
|
+
node.name + id_fragment(node) + class_fragment(node)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def id_fragment(node)
|
|
65
|
+
(id = node["id"]) && !id.empty? ? "##{id}" : ""
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def class_fragment(node)
|
|
69
|
+
return "" if node["id"] && !node["id"].empty?
|
|
70
|
+
|
|
71
|
+
(klass = node["class"]&.split&.first) ? ".#{klass}" : ""
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Browserctl
|
|
6
|
+
module Snapshot
|
|
7
|
+
# Stage 1 of the snapshot pipeline.
|
|
8
|
+
#
|
|
9
|
+
# Parses raw HTML and returns the set of interactable Nokogiri nodes
|
|
10
|
+
# that the rest of the pipeline will annotate. This stage knows nothing
|
|
11
|
+
# about refs, fingerprints, or wire format.
|
|
12
|
+
class Extractor
|
|
13
|
+
INTERACTABLE = %w[a button input select textarea
|
|
14
|
+
[role=button] [role=link] [role=menuitem]].freeze
|
|
15
|
+
|
|
16
|
+
def call(html)
|
|
17
|
+
Nokogiri::HTML(html).css(INTERACTABLE.join(",")).to_a
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "browserctl/snapshot/ref"
|
|
4
|
+
|
|
5
|
+
module Browserctl
|
|
6
|
+
module Snapshot
|
|
7
|
+
# Builds a per-element fingerprint that survives small DOM changes.
|
|
8
|
+
#
|
|
9
|
+
# The fingerprint is later used by the replay layer to rematch an
|
|
10
|
+
# element when its recorded selector no longer resolves: score the
|
|
11
|
+
# candidate elements in the new DOM against the recorded fingerprint
|
|
12
|
+
# and pick the best match above a threshold.
|
|
13
|
+
#
|
|
14
|
+
# Shape:
|
|
15
|
+
# {
|
|
16
|
+
# text: <accessible name>,
|
|
17
|
+
# role: <ARIA role, explicit or implicit>,
|
|
18
|
+
# neighbors: [<short text of nearby siblings>, ...],
|
|
19
|
+
# position: { index: <int>, depth: <int> }
|
|
20
|
+
# }
|
|
21
|
+
class Fingerprint
|
|
22
|
+
NEIGHBOR_RADIUS = 2 # siblings to capture on each side
|
|
23
|
+
NEIGHBOR_TEXT_LEN = 40
|
|
24
|
+
|
|
25
|
+
def initialize(ref_deriver: RefDeriver.new)
|
|
26
|
+
@ref_deriver = ref_deriver
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def build(node)
|
|
30
|
+
{
|
|
31
|
+
text: accessible_name(node),
|
|
32
|
+
role: role(node),
|
|
33
|
+
neighbors: neighbors(node),
|
|
34
|
+
position: position(node)
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def role(node)
|
|
41
|
+
explicit = node["role"]
|
|
42
|
+
return explicit if explicit && !explicit.empty?
|
|
43
|
+
|
|
44
|
+
RefDeriver::IMPLICIT_ROLE[node.name] || node.name
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def accessible_name(node)
|
|
48
|
+
%w[aria-label placeholder alt title].each do |attr|
|
|
49
|
+
v = node[attr]
|
|
50
|
+
return v.strip if v && !v.strip.empty?
|
|
51
|
+
end
|
|
52
|
+
node.text.to_s.strip.slice(0, 80)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def neighbors(node)
|
|
56
|
+
parent = node.parent
|
|
57
|
+
return [] unless parent.respond_to?(:children)
|
|
58
|
+
|
|
59
|
+
idx = parent.children.to_a.index(node) || 0
|
|
60
|
+
window = parent.children.to_a[[idx - NEIGHBOR_RADIUS, 0].max...(idx + NEIGHBOR_RADIUS + 1)] || []
|
|
61
|
+
window
|
|
62
|
+
.reject { |c| c == node || !c.respond_to?(:name) }
|
|
63
|
+
.map { |c| neighbor_signal(c) }
|
|
64
|
+
.reject(&:empty?)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def neighbor_signal(node)
|
|
68
|
+
text = node.text.to_s.strip.gsub(/\s+/, " ").slice(0, NEIGHBOR_TEXT_LEN)
|
|
69
|
+
text.empty? ? "" : "#{node.name}:#{text}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def position(node)
|
|
73
|
+
idx = node.parent.respond_to?(:children) ? (node.parent.children.to_a.index(node) || 0) : 0
|
|
74
|
+
{ index: idx, depth: depth(node) }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def depth(node)
|
|
78
|
+
d = 0
|
|
79
|
+
cur = node.parent
|
|
80
|
+
while cur.respond_to?(:name) && cur.name != "document"
|
|
81
|
+
d += 1
|
|
82
|
+
cur = cur.parent
|
|
83
|
+
end
|
|
84
|
+
d
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module Browserctl
|
|
6
|
+
module Snapshot
|
|
7
|
+
# Derives a stable element ref from semantic + structural signals.
|
|
8
|
+
#
|
|
9
|
+
# The same DOM element should produce the same ref across two snapshots
|
|
10
|
+
# of the same page. Inputs to the hash are:
|
|
11
|
+
# - role (explicit @role, else implicit ARIA role from tag)
|
|
12
|
+
# - accessible name (aria-label || text || placeholder || alt)
|
|
13
|
+
# - tag
|
|
14
|
+
# - parent path (chain of ancestor tag names up to <html>)
|
|
15
|
+
#
|
|
16
|
+
# Collisions within a single snapshot are disambiguated by the caller via
|
|
17
|
+
# `disambiguate(ref, taken)` — the deriver itself is pure.
|
|
18
|
+
class RefDeriver
|
|
19
|
+
IMPLICIT_ROLE = {
|
|
20
|
+
"a" => "link", "button" => "button", "input" => "textbox",
|
|
21
|
+
"select" => "combobox", "textarea" => "textbox"
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
HASH_LEN = 7
|
|
25
|
+
|
|
26
|
+
def derive(node)
|
|
27
|
+
signal = [role(node), accessible_name(node), node.name, parent_path(node)].join("|")
|
|
28
|
+
"e#{Digest::SHA256.hexdigest(signal)[0, HASH_LEN]}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Given a candidate ref and a set of already-taken refs in the current
|
|
32
|
+
# snapshot, return a unique ref. Adds `-2`, `-3`, ... as needed.
|
|
33
|
+
def disambiguate(ref, taken)
|
|
34
|
+
return ref unless taken.include?(ref)
|
|
35
|
+
|
|
36
|
+
n = 2
|
|
37
|
+
n += 1 while taken.include?("#{ref}-#{n}")
|
|
38
|
+
"#{ref}-#{n}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def role(node)
|
|
44
|
+
explicit = node["role"]
|
|
45
|
+
return explicit if explicit && !explicit.empty?
|
|
46
|
+
|
|
47
|
+
IMPLICIT_ROLE[node.name] || node.name
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def accessible_name(node)
|
|
51
|
+
%w[aria-label placeholder alt title].each do |attr|
|
|
52
|
+
v = node[attr]
|
|
53
|
+
return v.strip if v && !v.strip.empty?
|
|
54
|
+
end
|
|
55
|
+
text = node.text.to_s.strip
|
|
56
|
+
text.empty? ? "" : text.slice(0, 80)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def parent_path(node)
|
|
60
|
+
parts = []
|
|
61
|
+
cur = node.parent
|
|
62
|
+
while cur.respond_to?(:name) && cur.name != "html" && cur.name != "document"
|
|
63
|
+
parts.unshift(cur.name)
|
|
64
|
+
cur = cur.parent
|
|
65
|
+
end
|
|
66
|
+
parts.join(">")
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Browserctl
|
|
4
|
+
module Snapshot
|
|
5
|
+
# Stage 3 of the snapshot pipeline.
|
|
6
|
+
#
|
|
7
|
+
# Right now this is the identity function — annotated entries are
|
|
8
|
+
# already in the wire shape clients expect. It exists as a seam so
|
|
9
|
+
# later milestones can canonicalize, redact, or compress without
|
|
10
|
+
# touching extraction or annotation.
|
|
11
|
+
class Serializer
|
|
12
|
+
def call(entries)
|
|
13
|
+
entries
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|