browserctl 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -0
- data/README.md +1 -1
- data/bin/browserctl +45 -4
- data/lib/browserctl/client.rb +47 -3
- data/lib/browserctl/commands/cli_output.rb +16 -3
- data/lib/browserctl/commands/flow.rb +123 -0
- data/lib/browserctl/commands/state.rb +193 -0
- data/lib/browserctl/commands/workflow.rb +62 -4
- data/lib/browserctl/constants.rb +1 -1
- data/lib/browserctl/detectors/auth_required.rb +128 -0
- data/lib/browserctl/detectors.rb +2 -0
- data/lib/browserctl/errors.rb +36 -0
- data/lib/browserctl/flow.rb +215 -0
- data/lib/browserctl/flow_registry.rb +66 -0
- data/lib/browserctl/flows/stdlib/basic_auth.rb +30 -0
- data/lib/browserctl/flows/stdlib/cloudflare_solve.rb +59 -0
- data/lib/browserctl/flows/stdlib/magic_link_email.rb +28 -0
- data/lib/browserctl/flows/stdlib/oauth_github.rb +28 -0
- data/lib/browserctl/flows/stdlib/oauth_google.rb +30 -0
- data/lib/browserctl/flows/stdlib/totp_2fa.rb +61 -0
- data/lib/browserctl/recording.rb +212 -26
- data/lib/browserctl/replay/context.rb +40 -0
- data/lib/browserctl/replay/fingerprint_matcher.rb +86 -0
- data/lib/browserctl/replay/snapshot_diff.rb +51 -0
- data/lib/browserctl/replay/telemetry.rb +60 -0
- data/lib/browserctl/runner.rb +38 -4
- data/lib/browserctl/server/command_dispatcher.rb +10 -1
- data/lib/browserctl/server/handlers/interaction.rb +3 -3
- data/lib/browserctl/server/handlers/navigation.rb +33 -4
- data/lib/browserctl/server/handlers/observation.rb +43 -2
- data/lib/browserctl/server/handlers/state.rb +149 -0
- data/lib/browserctl/server/page_session.rb +9 -7
- data/lib/browserctl/server/snapshot_builder.rb +21 -45
- data/lib/browserctl/snapshot/annotator.rb +75 -0
- data/lib/browserctl/snapshot/extractor.rb +21 -0
- data/lib/browserctl/snapshot/fingerprint.rb +88 -0
- data/lib/browserctl/snapshot/ref.rb +70 -0
- data/lib/browserctl/snapshot/serializer.rb +17 -0
- data/lib/browserctl/state/bundle.rb +242 -0
- data/lib/browserctl/state/transport.rb +64 -0
- data/lib/browserctl/state/transports/file.rb +35 -0
- data/lib/browserctl/state/transports/one_password.rb +67 -0
- data/lib/browserctl/state/transports/s3.rb +42 -0
- data/lib/browserctl/state.rb +208 -0
- data/lib/browserctl/version.rb +1 -1
- data/lib/browserctl/workflow/flow_wrapper.rb +81 -0
- data/lib/browserctl/workflow/promoter.rb +96 -0
- data/lib/browserctl/workflow/promotion_ledger.rb +72 -0
- data/lib/browserctl/workflow.rb +180 -16
- metadata +32 -2
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "../constants"
|
|
6
|
+
|
|
7
|
+
module Browserctl
|
|
8
|
+
module Replay
|
|
9
|
+
# Append-only JSONL log of replay drift events for offline analysis.
|
|
10
|
+
# Local-only; nothing is uploaded. One line per event.
|
|
11
|
+
module Telemetry
|
|
12
|
+
LOG_BASENAME = "replay_drift.jsonl"
|
|
13
|
+
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
def log_path
|
|
17
|
+
File.join(Browserctl::BROWSERCTL_DIR, LOG_BASENAME)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Write each drift event from a Replay::Context as its own JSONL line.
|
|
21
|
+
# @param ctx [Browserctl::Replay::Context, nil]
|
|
22
|
+
# @param workflow [String] workflow name for cross-reference
|
|
23
|
+
# @param path [String] override the destination (testing)
|
|
24
|
+
# @return [Integer] number of events written
|
|
25
|
+
def emit(ctx, workflow:, path: log_path)
|
|
26
|
+
events = ctx&.drift_events
|
|
27
|
+
return 0 if events.nil? || events.empty?
|
|
28
|
+
|
|
29
|
+
ensure_log_file(path)
|
|
30
|
+
ts = Time.now.utc.iso8601
|
|
31
|
+
File.open(path, "a") do |f|
|
|
32
|
+
events.each do |e|
|
|
33
|
+
f.puts JSON.generate(
|
|
34
|
+
event: "replay_drift",
|
|
35
|
+
ts: ts,
|
|
36
|
+
workflow: workflow,
|
|
37
|
+
command: e.command.to_s,
|
|
38
|
+
selector: e.selector,
|
|
39
|
+
matched_ref: e.matched_ref,
|
|
40
|
+
score: e.score,
|
|
41
|
+
reason: e.reason
|
|
42
|
+
)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
events.size
|
|
46
|
+
rescue SystemCallError, IOError
|
|
47
|
+
# Telemetry must never break a run.
|
|
48
|
+
0
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ensure_log_file(path)
|
|
52
|
+
FileUtils.mkdir_p(File.dirname(path), mode: 0o700)
|
|
53
|
+
return if File.exist?(path)
|
|
54
|
+
|
|
55
|
+
FileUtils.touch(path)
|
|
56
|
+
File.chmod(0o600, path)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
data/lib/browserctl/runner.rb
CHANGED
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
4
|
require_relative "workflow"
|
|
5
|
+
require_relative "workflow/promotion_ledger"
|
|
5
6
|
require_relative "client"
|
|
7
|
+
require_relative "replay/telemetry"
|
|
6
8
|
|
|
7
9
|
module Browserctl
|
|
8
10
|
class Runner
|
|
@@ -14,13 +16,27 @@ module Browserctl
|
|
|
14
16
|
# Runs a named workflow with the given parameters.
|
|
15
17
|
# @param name [String] workflow name (must match /\A[a-zA-Z0-9_-]+\z/)
|
|
16
18
|
# @param params [Hash] keyword arguments passed to the workflow
|
|
17
|
-
# @
|
|
19
|
+
# @param check [Boolean] when true, attaches a Replay::Context, renders
|
|
20
|
+
# a drift report after the run, and signals drift via exit code 2.
|
|
21
|
+
# @return [Symbol] :clean (all ok, no drift), :drift (all ok, drift seen), :fail (any step failed)
|
|
18
22
|
# @raise [WorkflowError] if the name is invalid or a step fails
|
|
19
|
-
def run_workflow(name, **params)
|
|
23
|
+
def run_workflow(name, check: false, **params)
|
|
20
24
|
defn = fetch_workflow(name)
|
|
21
|
-
|
|
25
|
+
ctx = check ? Browserctl::Replay::Context.new : nil
|
|
26
|
+
begin
|
|
27
|
+
results = defn.call(params, Client.new, replay_context: ctx)
|
|
28
|
+
rescue StandardError
|
|
29
|
+
Browserctl::Workflow::PromotionLedger.record(workflow: name.to_s, verdict: :fail) if check
|
|
30
|
+
raise
|
|
31
|
+
end
|
|
22
32
|
print_results(results)
|
|
23
|
-
results
|
|
33
|
+
v = verdict(results, ctx)
|
|
34
|
+
if check
|
|
35
|
+
print_drift_report(ctx)
|
|
36
|
+
Browserctl::Replay::Telemetry.emit(ctx, workflow: name.to_s)
|
|
37
|
+
Browserctl::Workflow::PromotionLedger.record(workflow: name.to_s, verdict: v)
|
|
38
|
+
end
|
|
39
|
+
v
|
|
24
40
|
end
|
|
25
41
|
|
|
26
42
|
# Lists all registered workflows from the standard search paths.
|
|
@@ -109,6 +125,24 @@ module Browserctl
|
|
|
109
125
|
$stdout.puts " #{label} #{msg}"
|
|
110
126
|
end
|
|
111
127
|
|
|
128
|
+
def print_drift_report(ctx)
|
|
129
|
+
events = ctx&.drift_events || []
|
|
130
|
+
report = {
|
|
131
|
+
drift: events.any?,
|
|
132
|
+
rematches: events.count { |e| e.reason == "rematch" },
|
|
133
|
+
unresolved: events.count { |e| e.reason == "no candidate above threshold" },
|
|
134
|
+
events: events.map(&:to_h)
|
|
135
|
+
}
|
|
136
|
+
$stdout.puts JSON.pretty_generate(report)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def verdict(results, ctx)
|
|
140
|
+
return :fail unless results.all?(&:ok)
|
|
141
|
+
return :drift if ctx&.drift_events&.any?
|
|
142
|
+
|
|
143
|
+
:clean
|
|
144
|
+
end
|
|
145
|
+
|
|
112
146
|
def format_params(defn)
|
|
113
147
|
defn.param_defs.transform_values do |p|
|
|
114
148
|
entry = { required: p.required, secret: p.secret, default: p.default }
|
|
@@ -11,9 +11,11 @@ require_relative "handlers/devtools"
|
|
|
11
11
|
require_relative "handlers/daemon_control"
|
|
12
12
|
require_relative "handlers/storage"
|
|
13
13
|
require_relative "handlers/session"
|
|
14
|
+
require_relative "handlers/state"
|
|
14
15
|
require_relative "handlers/interaction"
|
|
15
16
|
require_relative "../detectors"
|
|
16
17
|
require_relative "../policy"
|
|
18
|
+
require_relative "../replay/snapshot_diff"
|
|
17
19
|
|
|
18
20
|
module Browserctl
|
|
19
21
|
class CommandDispatcher
|
|
@@ -26,6 +28,7 @@ module Browserctl
|
|
|
26
28
|
include Handlers::DaemonControl
|
|
27
29
|
include Handlers::Storage
|
|
28
30
|
include Handlers::Session
|
|
31
|
+
include Handlers::State
|
|
29
32
|
include Handlers::Interaction
|
|
30
33
|
|
|
31
34
|
COMMAND_MAP = {
|
|
@@ -36,6 +39,7 @@ module Browserctl
|
|
|
36
39
|
"navigate" => :cmd_navigate,
|
|
37
40
|
"wait" => :cmd_wait,
|
|
38
41
|
"snapshot" => :cmd_snapshot,
|
|
42
|
+
"auth_check" => :cmd_auth_check,
|
|
39
43
|
"evaluate" => :cmd_evaluate,
|
|
40
44
|
"fill" => :cmd_fill,
|
|
41
45
|
"click" => :cmd_click,
|
|
@@ -66,7 +70,12 @@ module Browserctl
|
|
|
66
70
|
"session_save" => :cmd_session_save,
|
|
67
71
|
"session_load" => :cmd_session_load,
|
|
68
72
|
"session_list" => :cmd_session_list,
|
|
69
|
-
"session_delete" => :cmd_session_delete
|
|
73
|
+
"session_delete" => :cmd_session_delete,
|
|
74
|
+
"state_save" => :cmd_state_save,
|
|
75
|
+
"state_load" => :cmd_state_load,
|
|
76
|
+
"state_list" => :cmd_state_list,
|
|
77
|
+
"state_info" => :cmd_state_info,
|
|
78
|
+
"state_delete" => :cmd_state_delete
|
|
70
79
|
}.freeze
|
|
71
80
|
|
|
72
81
|
SCREENSHOT_DIR = File.expand_path("~/.browserctl/screenshots").freeze
|
|
@@ -27,7 +27,7 @@ module Browserctl
|
|
|
27
27
|
"return { x: r.left + r.width / 2, y: r.top + r.height / 2 }; " \
|
|
28
28
|
"})(#{sel.to_json})"
|
|
29
29
|
)
|
|
30
|
-
return { error: "selector not found: #{sel}" } unless coords
|
|
30
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless coords
|
|
31
31
|
|
|
32
32
|
session.page.mouse.move(x: coords["x"], y: coords["y"])
|
|
33
33
|
{ ok: true }
|
|
@@ -43,7 +43,7 @@ module Browserctl
|
|
|
43
43
|
return sel if sel.is_a?(Hash)
|
|
44
44
|
|
|
45
45
|
el = session.page.at_css(sel)
|
|
46
|
-
return { error: "selector not found: #{sel}" } unless el
|
|
46
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
|
|
47
47
|
|
|
48
48
|
el.select_file(path)
|
|
49
49
|
{ ok: true }
|
|
@@ -56,7 +56,7 @@ module Browserctl
|
|
|
56
56
|
return sel if sel.is_a?(Hash)
|
|
57
57
|
|
|
58
58
|
el = session.page.at_css(sel)
|
|
59
|
-
return { error: "selector not found: #{sel}" } unless el
|
|
59
|
+
return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
|
|
60
60
|
|
|
61
61
|
el.evaluate(
|
|
62
62
|
"this.value = #{req[:value].to_json}; " \
|
|
@@ -33,7 +33,8 @@ module Browserctl
|
|
|
33
33
|
sel = resolve_selector_from(session, req)
|
|
34
34
|
return sel if sel.is_a?(Hash)
|
|
35
35
|
|
|
36
|
-
type_into(session.page, sel, req[:value])
|
|
36
|
+
result = type_into(session.page, sel, req[:value])
|
|
37
|
+
enrich_with_recording_metadata(result, session, sel, req)
|
|
37
38
|
end
|
|
38
39
|
end
|
|
39
40
|
|
|
@@ -42,17 +43,45 @@ module Browserctl
|
|
|
42
43
|
sel = resolve_selector_from(session, req)
|
|
43
44
|
return sel if sel.is_a?(Hash)
|
|
44
45
|
|
|
45
|
-
click_element(session.page, sel)
|
|
46
|
+
result = click_element(session.page, sel)
|
|
47
|
+
enrich_with_recording_metadata(result, session, sel, req)
|
|
46
48
|
end
|
|
47
49
|
end
|
|
48
50
|
|
|
51
|
+
# Adds ref / fingerprint / snapshot_id / postcondition_hint to a successful
|
|
52
|
+
# click/fill response. Recording uses these to build a self-healing log.
|
|
53
|
+
# When req[:capture_post_snapshot] is true, also takes a fresh snapshot
|
|
54
|
+
# and attaches its digest so workflow run --check can diff DOM state
|
|
55
|
+
# against the recorded baseline.
|
|
56
|
+
def enrich_with_recording_metadata(result, session, selector, req)
|
|
57
|
+
return result unless result[:ok]
|
|
58
|
+
|
|
59
|
+
ref = req[:ref] || session.ref_registry.invert[selector]
|
|
60
|
+
fp = (ref && session.fingerprint_index[ref]) || session.fingerprint_index[selector]
|
|
61
|
+
enriched = result.merge(
|
|
62
|
+
ref: ref,
|
|
63
|
+
fingerprint: fp,
|
|
64
|
+
snapshot_id: session.snapshot_id,
|
|
65
|
+
postcondition_hint: { url: session.page.current_url }
|
|
66
|
+
)
|
|
67
|
+
enriched[:post_snapshot_digest] = capture_post_snapshot_digest(session) if req[:capture_post_snapshot]
|
|
68
|
+
enriched.compact
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def capture_post_snapshot_digest(session)
|
|
72
|
+
snapshot = @snapshot_builder.call(session.page)
|
|
73
|
+
Browserctl::Replay::SnapshotDiff.digest(snapshot)
|
|
74
|
+
rescue StandardError
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
49
78
|
def cmd_url(req)
|
|
50
79
|
with_page(req[:name]) { |session| { ok: true, url: session.page.current_url } }
|
|
51
80
|
end
|
|
52
81
|
|
|
53
82
|
def type_into(page, selector, value)
|
|
54
83
|
el = page.at_css(selector)
|
|
55
|
-
return { error: "selector not found: #{selector}" } unless el
|
|
84
|
+
return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
|
|
56
85
|
|
|
57
86
|
el.focus
|
|
58
87
|
el.evaluate("this.select()")
|
|
@@ -62,7 +91,7 @@ module Browserctl
|
|
|
62
91
|
|
|
63
92
|
def click_element(page, selector)
|
|
64
93
|
el = page.at_css(selector)
|
|
65
|
-
return { error: "selector not found: #{selector}" } unless el
|
|
94
|
+
return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
|
|
66
95
|
|
|
67
96
|
# Use the DOM native click() so JS-only event listeners fire.
|
|
68
97
|
# CDP mouse simulation (el.click) dispatches events at screen coordinates
|
|
@@ -12,6 +12,30 @@ module Browserctl
|
|
|
12
12
|
with_page(req[:name]) { |session| take_snapshot(session, req[:format], req[:diff]) }
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Runs the auth_required detector against the page and returns either a
|
|
16
|
+
# plain `{ ok: true, auth_required: false }` response or a structured
|
|
17
|
+
# `{ error:, code: "AUTH_REQUIRED", state:, suggested_flow:, reason: }`
|
|
18
|
+
# error. Callers feed in cookies / suggested_flow when they have a
|
|
19
|
+
# bundle in hand (see PR 18); without them, only the URL signal fires.
|
|
20
|
+
def cmd_auth_check(req)
|
|
21
|
+
with_page(req[:name]) do |session|
|
|
22
|
+
cookies = session.page.cookies.all.values.map(&:to_h) if req[:include_cookies]
|
|
23
|
+
result = Browserctl::Detectors.auth_required(
|
|
24
|
+
session.page,
|
|
25
|
+
cookies: cookies,
|
|
26
|
+
suggested_flow: req[:suggested_flow]
|
|
27
|
+
)
|
|
28
|
+
next { ok: true, auth_required: false } unless result.triggered
|
|
29
|
+
|
|
30
|
+
Browserctl::AuthRequiredError.new(
|
|
31
|
+
result.reason,
|
|
32
|
+
state: req[:state],
|
|
33
|
+
suggested_flow: result.suggested_flow,
|
|
34
|
+
reason: result.reason
|
|
35
|
+
).to_response
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
15
39
|
def take_snapshot(session, format, diff)
|
|
16
40
|
nonce = SecureRandom.hex(8)
|
|
17
41
|
challenge = Detectors.cloudflare?(session.page)
|
|
@@ -20,15 +44,32 @@ module Browserctl
|
|
|
20
44
|
|
|
21
45
|
snapshot = @snapshot_builder.call(session.page)
|
|
22
46
|
registry = snapshot.to_h { |el| [el[:ref], el[:selector]] }
|
|
47
|
+
fp_index = build_fingerprint_index(snapshot)
|
|
23
48
|
|
|
24
49
|
prev = session.prev_snapshot
|
|
25
|
-
session.ref_registry
|
|
26
|
-
session.
|
|
50
|
+
session.ref_registry = registry
|
|
51
|
+
session.fingerprint_index = fp_index
|
|
52
|
+
session.snapshot_id = nonce
|
|
53
|
+
session.prev_snapshot = snapshot
|
|
27
54
|
result = diff && prev ? compute_diff(prev, snapshot) : snapshot
|
|
28
55
|
|
|
29
56
|
{ ok: true, snapshot: result, challenge: challenge, nonce: nonce }
|
|
30
57
|
end
|
|
31
58
|
|
|
59
|
+
def build_fingerprint_index(snapshot)
|
|
60
|
+
index = {}
|
|
61
|
+
snapshot.each do |el|
|
|
62
|
+
ref = el[:ref]
|
|
63
|
+
sel = el[:selector]
|
|
64
|
+
fp = el[:fingerprint]
|
|
65
|
+
next unless fp
|
|
66
|
+
|
|
67
|
+
index[ref] = fp if ref
|
|
68
|
+
index[sel] = fp if sel
|
|
69
|
+
end
|
|
70
|
+
index
|
|
71
|
+
end
|
|
72
|
+
|
|
32
73
|
def compute_diff(prev, current)
|
|
33
74
|
prev_by_sel = prev.to_h { |el| [el[:selector], el] }
|
|
34
75
|
current.reject do |el|
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require_relative "../../state"
|
|
5
|
+
|
|
6
|
+
module Browserctl
|
|
7
|
+
class CommandDispatcher
|
|
8
|
+
module Handlers
|
|
9
|
+
# Top-level state management — collapses cookies + localStorage +
|
|
10
|
+
# sessionStorage into a single `.bctl` bundle. See lib/browserctl/state.rb.
|
|
11
|
+
module State
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def cmd_state_save(req)
|
|
15
|
+
first_session = @global_mutex.synchronize { @pages.values.first }
|
|
16
|
+
return { error: "no open pages — open a page before saving state" } unless first_session
|
|
17
|
+
|
|
18
|
+
payload, captured_origins = capture_state_payload
|
|
19
|
+
manifest = Browserctl::State.save(
|
|
20
|
+
req[:name],
|
|
21
|
+
payload: payload,
|
|
22
|
+
origins: req[:origins] || captured_origins,
|
|
23
|
+
flow: req[:flow],
|
|
24
|
+
flow_version: req[:flow_version],
|
|
25
|
+
passphrase: req[:passphrase]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
{
|
|
29
|
+
ok: true,
|
|
30
|
+
path: Browserctl::State.path(req[:name]),
|
|
31
|
+
origins: manifest[:origins],
|
|
32
|
+
cookies: payload[:cookies].length,
|
|
33
|
+
encrypted: manifest[:encrypted]
|
|
34
|
+
}
|
|
35
|
+
rescue Browserctl::Error, ArgumentError => e
|
|
36
|
+
{ error: e.message }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def cmd_state_load(req)
|
|
40
|
+
data = Browserctl::State.load(req[:name], passphrase: req[:passphrase])
|
|
41
|
+
target = @global_mutex.synchronize { @pages.values.first }
|
|
42
|
+
return { error: "no open pages — open a page before loading state" } unless target
|
|
43
|
+
|
|
44
|
+
cookies = pluck(data[:payload], :cookies, default: [])
|
|
45
|
+
|
|
46
|
+
unless req[:skip_auth_check]
|
|
47
|
+
auth = Browserctl::Detectors.auth_required(
|
|
48
|
+
target.page, cookies: cookies, suggested_flow: data[:manifest][:flow]
|
|
49
|
+
)
|
|
50
|
+
if auth.triggered
|
|
51
|
+
return Browserctl::AuthRequiredError.new(
|
|
52
|
+
auth.reason,
|
|
53
|
+
state: req[:name],
|
|
54
|
+
suggested_flow: auth.suggested_flow,
|
|
55
|
+
reason: auth.reason
|
|
56
|
+
).to_response
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
restore_state_cookies(target, cookies)
|
|
61
|
+
ls_count = restore_local_storage(pluck(data[:payload], :local_storage, default: {}))
|
|
62
|
+
|
|
63
|
+
{
|
|
64
|
+
ok: true,
|
|
65
|
+
cookies: cookies.length,
|
|
66
|
+
local_storage_keys: ls_count,
|
|
67
|
+
origins: data[:manifest][:origins]
|
|
68
|
+
}
|
|
69
|
+
rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError, JSON::ParserError => e
|
|
70
|
+
{ error: e.message }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def pluck(hash, sym, default:)
|
|
74
|
+
hash[sym] || hash[sym.to_s] || default
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def restore_state_cookies(target, cookies)
|
|
78
|
+
cookies.each do |raw|
|
|
79
|
+
c = raw.transform_keys(&:to_sym)
|
|
80
|
+
target.page.cookies.set(**c.slice(:name, :value, :domain, :path))
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def cmd_state_list(_req)
|
|
85
|
+
{ ok: true, state: Browserctl::State.all }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def cmd_state_info(req)
|
|
89
|
+
{ ok: true, info: Browserctl::State.info(req[:name]) }
|
|
90
|
+
rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError => e
|
|
91
|
+
{ error: e.message }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def cmd_state_delete(req)
|
|
95
|
+
Browserctl::State.delete(req[:name])
|
|
96
|
+
{ ok: true }
|
|
97
|
+
rescue ArgumentError => e
|
|
98
|
+
{ error: e.message }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def capture_state_payload
|
|
102
|
+
first = @global_mutex.synchronize { @pages.values.first }
|
|
103
|
+
cookies = first.page.cookies.all.values.map(&:to_h)
|
|
104
|
+
|
|
105
|
+
local_storage = {}
|
|
106
|
+
session_storage = {}
|
|
107
|
+
captured_origins = []
|
|
108
|
+
|
|
109
|
+
@global_mutex.synchronize { @pages.dup }.each_value do |session|
|
|
110
|
+
session.mutex.synchronize do
|
|
111
|
+
origin = session.page.evaluate("location.origin")
|
|
112
|
+
ls_str = session.page.evaluate("JSON.stringify({...localStorage})") || "{}"
|
|
113
|
+
ss_str = session.page.evaluate("JSON.stringify({...sessionStorage})") || "{}"
|
|
114
|
+
local_storage[origin] = JSON.parse(ls_str)
|
|
115
|
+
session_storage[origin] = JSON.parse(ss_str)
|
|
116
|
+
captured_origins << origin
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
payload = {
|
|
121
|
+
cookies: cookies,
|
|
122
|
+
local_storage: local_storage,
|
|
123
|
+
session_storage: session_storage
|
|
124
|
+
}
|
|
125
|
+
[payload, captured_origins.uniq]
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def restore_local_storage(local_storage)
|
|
129
|
+
count = 0
|
|
130
|
+
local_storage.each do |origin, keys|
|
|
131
|
+
next if keys.nil? || keys.empty?
|
|
132
|
+
|
|
133
|
+
tmp_page = @driver.create_page
|
|
134
|
+
begin
|
|
135
|
+
tmp_page.go_to(origin.to_s)
|
|
136
|
+
keys.each do |k, v|
|
|
137
|
+
tmp_page.evaluate("localStorage.setItem(#{k.to_json}, #{v.to_json})")
|
|
138
|
+
count += 1
|
|
139
|
+
end
|
|
140
|
+
ensure
|
|
141
|
+
tmp_page.close
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
count
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -3,15 +3,17 @@
|
|
|
3
3
|
module Browserctl
|
|
4
4
|
class PageSession
|
|
5
5
|
attr_reader :page, :mutex, :pause_cv
|
|
6
|
-
attr_accessor :ref_registry, :prev_snapshot
|
|
6
|
+
attr_accessor :ref_registry, :prev_snapshot, :fingerprint_index, :snapshot_id
|
|
7
7
|
|
|
8
8
|
def initialize(page)
|
|
9
|
-
@page
|
|
10
|
-
@mutex
|
|
11
|
-
@pause_cv
|
|
12
|
-
@ref_registry
|
|
13
|
-
@
|
|
14
|
-
@
|
|
9
|
+
@page = page
|
|
10
|
+
@mutex = Mutex.new
|
|
11
|
+
@pause_cv = ConditionVariable.new
|
|
12
|
+
@ref_registry = {}
|
|
13
|
+
@fingerprint_index = {}
|
|
14
|
+
@snapshot_id = nil
|
|
15
|
+
@prev_snapshot = nil
|
|
16
|
+
@paused = false
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def paused? = @paused
|
|
@@ -1,55 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "
|
|
3
|
+
require "browserctl/snapshot/extractor"
|
|
4
|
+
require "browserctl/snapshot/annotator"
|
|
5
|
+
require "browserctl/snapshot/serializer"
|
|
4
6
|
|
|
5
7
|
module Browserctl
|
|
8
|
+
# Orchestrates the snapshot pipeline:
|
|
9
|
+
#
|
|
10
|
+
# page.body ──Extractor──▶ [nodes]
|
|
11
|
+
# ──Annotator──▶ [entries with ref + fingerprint]
|
|
12
|
+
# ──Serializer─▶ wire-shape array
|
|
13
|
+
#
|
|
14
|
+
# Each stage is independently testable. Inject alternates via the keyword
|
|
15
|
+
# args for tests that want to isolate one stage.
|
|
6
16
|
class SnapshotBuilder
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
ref = 0
|
|
14
|
-
doc.css(INTERACTABLE.join(",")).map { |el| element_entry(el, ref += 1) }
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
private
|
|
18
|
-
|
|
19
|
-
def element_entry(elem, ref)
|
|
20
|
-
{ ref: "e#{ref}", tag: elem.name, text: elem.text.strip.slice(0, 80),
|
|
21
|
-
selector: css_path(elem), attrs: element_attrs(elem) }
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def element_attrs(elem)
|
|
25
|
-
elem.attributes.transform_values(&:value).slice(*ATTRS)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def css_path(node)
|
|
29
|
-
ancestors_until_html(node).map { |n| path_segment(n) }.join(" > ")
|
|
17
|
+
def initialize(extractor: Snapshot::Extractor.new,
|
|
18
|
+
annotator: Snapshot::Annotator.new,
|
|
19
|
+
serializer: Snapshot::Serializer.new)
|
|
20
|
+
@extractor = extractor
|
|
21
|
+
@annotator = annotator
|
|
22
|
+
@serializer = serializer
|
|
30
23
|
end
|
|
31
24
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
node = node.parent
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def path_segment(node)
|
|
42
|
-
node.name + id_fragment(node) + class_fragment(node)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def id_fragment(node)
|
|
46
|
-
(id = node["id"]) && !id.empty? ? "##{id}" : ""
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def class_fragment(node)
|
|
50
|
-
return "" if node["id"] && !node["id"].empty?
|
|
51
|
-
|
|
52
|
-
(klass = node["class"]&.split&.first) ? ".#{klass}" : ""
|
|
25
|
+
def call(page)
|
|
26
|
+
nodes = @extractor.call(page.body)
|
|
27
|
+
entries = @annotator.call(nodes)
|
|
28
|
+
@serializer.call(entries)
|
|
53
29
|
end
|
|
54
30
|
end
|
|
55
31
|
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "browserctl/snapshot/ref"
|
|
4
|
+
require "browserctl/snapshot/fingerprint"
|
|
5
|
+
|
|
6
|
+
module Browserctl
|
|
7
|
+
module Snapshot
|
|
8
|
+
# Stage 2 of the snapshot pipeline.
|
|
9
|
+
#
|
|
10
|
+
# Takes the list of interactable nodes from Extractor and produces
|
|
11
|
+
# element entries with stable refs, semantic metadata, a CSS selector
|
|
12
|
+
# path, and a fingerprint. Each entry is a plain Hash.
|
|
13
|
+
class Annotator
|
|
14
|
+
ATTRS = %w[type name placeholder href aria-label role].freeze
|
|
15
|
+
|
|
16
|
+
def initialize(ref_deriver: RefDeriver.new, fingerprint: Fingerprint.new)
|
|
17
|
+
@ref_deriver = ref_deriver
|
|
18
|
+
@fingerprint = fingerprint
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def call(nodes)
|
|
22
|
+
taken = {}
|
|
23
|
+
nodes.map do |node|
|
|
24
|
+
ref = @ref_deriver.disambiguate(@ref_deriver.derive(node), taken)
|
|
25
|
+
taken[ref] = true
|
|
26
|
+
entry(node, ref)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def entry(node, ref)
|
|
33
|
+
{
|
|
34
|
+
ref: ref,
|
|
35
|
+
tag: node.name,
|
|
36
|
+
text: node.text.strip.slice(0, 80),
|
|
37
|
+
selector: css_path(node),
|
|
38
|
+
attrs: attrs(node),
|
|
39
|
+
fingerprint: @fingerprint.build(node)
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def attrs(node)
|
|
44
|
+
node.attributes.transform_values(&:value).slice(*ATTRS)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def css_path(node)
|
|
48
|
+
ancestors_until_html(node).map { |n| segment(n) }.join(" > ")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ancestors_until_html(node)
|
|
52
|
+
[].tap do |acc|
|
|
53
|
+
while node && node.name != "html"
|
|
54
|
+
acc.unshift(node)
|
|
55
|
+
node = node.parent
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def segment(node)
|
|
61
|
+
node.name + id_fragment(node) + class_fragment(node)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def id_fragment(node)
|
|
65
|
+
(id = node["id"]) && !id.empty? ? "##{id}" : ""
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def class_fragment(node)
|
|
69
|
+
return "" if node["id"] && !node["id"].empty?
|
|
70
|
+
|
|
71
|
+
(klass = node["class"]&.split&.first) ? ".#{klass}" : ""
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Browserctl
|
|
6
|
+
module Snapshot
|
|
7
|
+
# Stage 1 of the snapshot pipeline.
|
|
8
|
+
#
|
|
9
|
+
# Parses raw HTML and returns the set of interactable Nokogiri nodes
|
|
10
|
+
# that the rest of the pipeline will annotate. This stage knows nothing
|
|
11
|
+
# about refs, fingerprints, or wire format.
|
|
12
|
+
class Extractor
|
|
13
|
+
INTERACTABLE = %w[a button input select textarea
|
|
14
|
+
[role=button] [role=link] [role=menuitem]].freeze
|
|
15
|
+
|
|
16
|
+
def call(html)
|
|
17
|
+
Nokogiri::HTML(html).css(INTERACTABLE.join(",")).to_a
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|