browserctl 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +45 -0
  3. data/README.md +1 -1
  4. data/bin/browserctl +45 -4
  5. data/lib/browserctl/client.rb +47 -3
  6. data/lib/browserctl/commands/cli_output.rb +16 -3
  7. data/lib/browserctl/commands/flow.rb +123 -0
  8. data/lib/browserctl/commands/state.rb +193 -0
  9. data/lib/browserctl/commands/workflow.rb +62 -4
  10. data/lib/browserctl/constants.rb +1 -1
  11. data/lib/browserctl/detectors/auth_required.rb +128 -0
  12. data/lib/browserctl/detectors.rb +2 -0
  13. data/lib/browserctl/errors.rb +36 -0
  14. data/lib/browserctl/flow.rb +215 -0
  15. data/lib/browserctl/flow_registry.rb +66 -0
  16. data/lib/browserctl/flows/stdlib/basic_auth.rb +30 -0
  17. data/lib/browserctl/flows/stdlib/cloudflare_solve.rb +59 -0
  18. data/lib/browserctl/flows/stdlib/magic_link_email.rb +28 -0
  19. data/lib/browserctl/flows/stdlib/oauth_github.rb +28 -0
  20. data/lib/browserctl/flows/stdlib/oauth_google.rb +30 -0
  21. data/lib/browserctl/flows/stdlib/totp_2fa.rb +61 -0
  22. data/lib/browserctl/recording.rb +212 -26
  23. data/lib/browserctl/replay/context.rb +40 -0
  24. data/lib/browserctl/replay/fingerprint_matcher.rb +86 -0
  25. data/lib/browserctl/replay/snapshot_diff.rb +51 -0
  26. data/lib/browserctl/replay/telemetry.rb +60 -0
  27. data/lib/browserctl/runner.rb +38 -4
  28. data/lib/browserctl/server/command_dispatcher.rb +10 -1
  29. data/lib/browserctl/server/handlers/interaction.rb +3 -3
  30. data/lib/browserctl/server/handlers/navigation.rb +33 -4
  31. data/lib/browserctl/server/handlers/observation.rb +43 -2
  32. data/lib/browserctl/server/handlers/state.rb +149 -0
  33. data/lib/browserctl/server/page_session.rb +9 -7
  34. data/lib/browserctl/server/snapshot_builder.rb +21 -45
  35. data/lib/browserctl/snapshot/annotator.rb +75 -0
  36. data/lib/browserctl/snapshot/extractor.rb +21 -0
  37. data/lib/browserctl/snapshot/fingerprint.rb +88 -0
  38. data/lib/browserctl/snapshot/ref.rb +70 -0
  39. data/lib/browserctl/snapshot/serializer.rb +17 -0
  40. data/lib/browserctl/state/bundle.rb +242 -0
  41. data/lib/browserctl/state/transport.rb +64 -0
  42. data/lib/browserctl/state/transports/file.rb +35 -0
  43. data/lib/browserctl/state/transports/one_password.rb +67 -0
  44. data/lib/browserctl/state/transports/s3.rb +42 -0
  45. data/lib/browserctl/state.rb +208 -0
  46. data/lib/browserctl/version.rb +1 -1
  47. data/lib/browserctl/workflow/flow_wrapper.rb +81 -0
  48. data/lib/browserctl/workflow/promoter.rb +96 -0
  49. data/lib/browserctl/workflow/promotion_ledger.rb +72 -0
  50. data/lib/browserctl/workflow.rb +180 -16
  51. metadata +32 -2
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require_relative "../constants"
6
+
7
+ module Browserctl
8
+ module Replay
9
+ # Append-only JSONL log of replay drift events for offline analysis.
10
+ # Local-only; nothing is uploaded. One line per event.
11
+ module Telemetry
12
+ LOG_BASENAME = "replay_drift.jsonl"
13
+
14
+ module_function
15
+
16
+ def log_path
17
+ File.join(Browserctl::BROWSERCTL_DIR, LOG_BASENAME)
18
+ end
19
+
20
+ # Write each drift event from a Replay::Context as its own JSONL line.
21
+ # @param ctx [Browserctl::Replay::Context, nil]
22
+ # @param workflow [String] workflow name for cross-reference
23
+ # @param path [String] override the destination (testing)
24
+ # @return [Integer] number of events written
25
+ def emit(ctx, workflow:, path: log_path)
26
+ events = ctx&.drift_events
27
+ return 0 if events.nil? || events.empty?
28
+
29
+ ensure_log_file(path)
30
+ ts = Time.now.utc.iso8601
31
+ File.open(path, "a") do |f|
32
+ events.each do |e|
33
+ f.puts JSON.generate(
34
+ event: "replay_drift",
35
+ ts: ts,
36
+ workflow: workflow,
37
+ command: e.command.to_s,
38
+ selector: e.selector,
39
+ matched_ref: e.matched_ref,
40
+ score: e.score,
41
+ reason: e.reason
42
+ )
43
+ end
44
+ end
45
+ events.size
46
+ rescue SystemCallError, IOError
47
+ # Telemetry must never break a run.
48
+ 0
49
+ end
50
+
51
+ def ensure_log_file(path)
52
+ FileUtils.mkdir_p(File.dirname(path), mode: 0o700)
53
+ return if File.exist?(path)
54
+
55
+ FileUtils.touch(path)
56
+ File.chmod(0o600, path)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -2,7 +2,9 @@
2
2
 
3
3
  require "json"
4
4
  require_relative "workflow"
5
+ require_relative "workflow/promotion_ledger"
5
6
  require_relative "client"
7
+ require_relative "replay/telemetry"
6
8
 
7
9
  module Browserctl
8
10
  class Runner
@@ -14,13 +16,27 @@ module Browserctl
14
16
  # Runs a named workflow with the given parameters.
15
17
  # @param name [String] workflow name (must match /\A[a-zA-Z0-9_-]+\z/)
16
18
  # @param params [Hash] keyword arguments passed to the workflow
17
- # @return [Boolean] true if all steps succeeded
19
+ # @param check [Boolean] when true, attaches a Replay::Context, renders
20
+ # a drift report after the run, and signals drift via exit code 2.
21
+ # @return [Symbol] :clean (all ok, no drift), :drift (all ok, drift seen), :fail (any step failed)
18
22
  # @raise [WorkflowError] if the name is invalid or a step fails
19
- def run_workflow(name, **params)
23
+ def run_workflow(name, check: false, **params)
20
24
  defn = fetch_workflow(name)
21
- results = defn.call(params, Client.new)
25
+ ctx = check ? Browserctl::Replay::Context.new : nil
26
+ begin
27
+ results = defn.call(params, Client.new, replay_context: ctx)
28
+ rescue StandardError
29
+ Browserctl::Workflow::PromotionLedger.record(workflow: name.to_s, verdict: :fail) if check
30
+ raise
31
+ end
22
32
  print_results(results)
23
- results.all?(&:ok)
33
+ v = verdict(results, ctx)
34
+ if check
35
+ print_drift_report(ctx)
36
+ Browserctl::Replay::Telemetry.emit(ctx, workflow: name.to_s)
37
+ Browserctl::Workflow::PromotionLedger.record(workflow: name.to_s, verdict: v)
38
+ end
39
+ v
24
40
  end
25
41
 
26
42
  # Lists all registered workflows from the standard search paths.
@@ -109,6 +125,24 @@ module Browserctl
109
125
  $stdout.puts " #{label} #{msg}"
110
126
  end
111
127
 
128
+ def print_drift_report(ctx)
129
+ events = ctx&.drift_events || []
130
+ report = {
131
+ drift: events.any?,
132
+ rematches: events.count { |e| e.reason == "rematch" },
133
+ unresolved: events.count { |e| e.reason == "no candidate above threshold" },
134
+ events: events.map(&:to_h)
135
+ }
136
+ $stdout.puts JSON.pretty_generate(report)
137
+ end
138
+
139
+ def verdict(results, ctx)
140
+ return :fail unless results.all?(&:ok)
141
+ return :drift if ctx&.drift_events&.any?
142
+
143
+ :clean
144
+ end
145
+
112
146
  def format_params(defn)
113
147
  defn.param_defs.transform_values do |p|
114
148
  entry = { required: p.required, secret: p.secret, default: p.default }
@@ -11,9 +11,11 @@ require_relative "handlers/devtools"
11
11
  require_relative "handlers/daemon_control"
12
12
  require_relative "handlers/storage"
13
13
  require_relative "handlers/session"
14
+ require_relative "handlers/state"
14
15
  require_relative "handlers/interaction"
15
16
  require_relative "../detectors"
16
17
  require_relative "../policy"
18
+ require_relative "../replay/snapshot_diff"
17
19
 
18
20
  module Browserctl
19
21
  class CommandDispatcher
@@ -26,6 +28,7 @@ module Browserctl
26
28
  include Handlers::DaemonControl
27
29
  include Handlers::Storage
28
30
  include Handlers::Session
31
+ include Handlers::State
29
32
  include Handlers::Interaction
30
33
 
31
34
  COMMAND_MAP = {
@@ -36,6 +39,7 @@ module Browserctl
36
39
  "navigate" => :cmd_navigate,
37
40
  "wait" => :cmd_wait,
38
41
  "snapshot" => :cmd_snapshot,
42
+ "auth_check" => :cmd_auth_check,
39
43
  "evaluate" => :cmd_evaluate,
40
44
  "fill" => :cmd_fill,
41
45
  "click" => :cmd_click,
@@ -66,7 +70,12 @@ module Browserctl
66
70
  "session_save" => :cmd_session_save,
67
71
  "session_load" => :cmd_session_load,
68
72
  "session_list" => :cmd_session_list,
69
- "session_delete" => :cmd_session_delete
73
+ "session_delete" => :cmd_session_delete,
74
+ "state_save" => :cmd_state_save,
75
+ "state_load" => :cmd_state_load,
76
+ "state_list" => :cmd_state_list,
77
+ "state_info" => :cmd_state_info,
78
+ "state_delete" => :cmd_state_delete
70
79
  }.freeze
71
80
 
72
81
  SCREENSHOT_DIR = File.expand_path("~/.browserctl/screenshots").freeze
@@ -27,7 +27,7 @@ module Browserctl
27
27
  "return { x: r.left + r.width / 2, y: r.top + r.height / 2 }; " \
28
28
  "})(#{sel.to_json})"
29
29
  )
30
- return { error: "selector not found: #{sel}" } unless coords
30
+ return { error: "selector not found: #{sel}", code: "selector_not_found" } unless coords
31
31
 
32
32
  session.page.mouse.move(x: coords["x"], y: coords["y"])
33
33
  { ok: true }
@@ -43,7 +43,7 @@ module Browserctl
43
43
  return sel if sel.is_a?(Hash)
44
44
 
45
45
  el = session.page.at_css(sel)
46
- return { error: "selector not found: #{sel}" } unless el
46
+ return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
47
47
 
48
48
  el.select_file(path)
49
49
  { ok: true }
@@ -56,7 +56,7 @@ module Browserctl
56
56
  return sel if sel.is_a?(Hash)
57
57
 
58
58
  el = session.page.at_css(sel)
59
- return { error: "selector not found: #{sel}" } unless el
59
+ return { error: "selector not found: #{sel}", code: "selector_not_found" } unless el
60
60
 
61
61
  el.evaluate(
62
62
  "this.value = #{req[:value].to_json}; " \
@@ -33,7 +33,8 @@ module Browserctl
33
33
  sel = resolve_selector_from(session, req)
34
34
  return sel if sel.is_a?(Hash)
35
35
 
36
- type_into(session.page, sel, req[:value])
36
+ result = type_into(session.page, sel, req[:value])
37
+ enrich_with_recording_metadata(result, session, sel, req)
37
38
  end
38
39
  end
39
40
 
@@ -42,17 +43,45 @@ module Browserctl
42
43
  sel = resolve_selector_from(session, req)
43
44
  return sel if sel.is_a?(Hash)
44
45
 
45
- click_element(session.page, sel)
46
+ result = click_element(session.page, sel)
47
+ enrich_with_recording_metadata(result, session, sel, req)
46
48
  end
47
49
  end
48
50
 
51
+ # Adds ref / fingerprint / snapshot_id / postcondition_hint to a successful
52
+ # click/fill response. Recording uses these to build a self-healing log.
53
+ # When req[:capture_post_snapshot] is true, also takes a fresh snapshot
54
+ # and attaches its digest so workflow run --check can diff DOM state
55
+ # against the recorded baseline.
56
+ def enrich_with_recording_metadata(result, session, selector, req)
57
+ return result unless result[:ok]
58
+
59
+ ref = req[:ref] || session.ref_registry.invert[selector]
60
+ fp = (ref && session.fingerprint_index[ref]) || session.fingerprint_index[selector]
61
+ enriched = result.merge(
62
+ ref: ref,
63
+ fingerprint: fp,
64
+ snapshot_id: session.snapshot_id,
65
+ postcondition_hint: { url: session.page.current_url }
66
+ )
67
+ enriched[:post_snapshot_digest] = capture_post_snapshot_digest(session) if req[:capture_post_snapshot]
68
+ enriched.compact
69
+ end
70
+
71
+ def capture_post_snapshot_digest(session)
72
+ snapshot = @snapshot_builder.call(session.page)
73
+ Browserctl::Replay::SnapshotDiff.digest(snapshot)
74
+ rescue StandardError
75
+ nil
76
+ end
77
+
49
78
  def cmd_url(req)
50
79
  with_page(req[:name]) { |session| { ok: true, url: session.page.current_url } }
51
80
  end
52
81
 
53
82
  def type_into(page, selector, value)
54
83
  el = page.at_css(selector)
55
- return { error: "selector not found: #{selector}" } unless el
84
+ return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
56
85
 
57
86
  el.focus
58
87
  el.evaluate("this.select()")
@@ -62,7 +91,7 @@ module Browserctl
62
91
 
63
92
  def click_element(page, selector)
64
93
  el = page.at_css(selector)
65
- return { error: "selector not found: #{selector}" } unless el
94
+ return { error: "selector not found: #{selector}", code: "selector_not_found" } unless el
66
95
 
67
96
  # Use the DOM native click() so JS-only event listeners fire.
68
97
  # CDP mouse simulation (el.click) dispatches events at screen coordinates
@@ -12,6 +12,30 @@ module Browserctl
12
12
  with_page(req[:name]) { |session| take_snapshot(session, req[:format], req[:diff]) }
13
13
  end
14
14
 
15
+ # Runs the auth_required detector against the page and returns either a
16
+ # plain `{ ok: true, auth_required: false }` response or a structured
17
+ # `{ error:, code: "AUTH_REQUIRED", state:, suggested_flow:, reason: }`
18
+ # error. Callers feed in cookies / suggested_flow when they have a
19
+ # bundle in hand (see PR 18); without them, only the URL signal fires.
20
+ def cmd_auth_check(req)
21
+ with_page(req[:name]) do |session|
22
+ cookies = session.page.cookies.all.values.map(&:to_h) if req[:include_cookies]
23
+ result = Browserctl::Detectors.auth_required(
24
+ session.page,
25
+ cookies: cookies,
26
+ suggested_flow: req[:suggested_flow]
27
+ )
28
+ next { ok: true, auth_required: false } unless result.triggered
29
+
30
+ Browserctl::AuthRequiredError.new(
31
+ result.reason,
32
+ state: req[:state],
33
+ suggested_flow: result.suggested_flow,
34
+ reason: result.reason
35
+ ).to_response
36
+ end
37
+ end
38
+
15
39
  def take_snapshot(session, format, diff)
16
40
  nonce = SecureRandom.hex(8)
17
41
  challenge = Detectors.cloudflare?(session.page)
@@ -20,15 +44,32 @@ module Browserctl
20
44
 
21
45
  snapshot = @snapshot_builder.call(session.page)
22
46
  registry = snapshot.to_h { |el| [el[:ref], el[:selector]] }
47
+ fp_index = build_fingerprint_index(snapshot)
23
48
 
24
49
  prev = session.prev_snapshot
25
- session.ref_registry = registry
26
- session.prev_snapshot = snapshot
50
+ session.ref_registry = registry
51
+ session.fingerprint_index = fp_index
52
+ session.snapshot_id = nonce
53
+ session.prev_snapshot = snapshot
27
54
  result = diff && prev ? compute_diff(prev, snapshot) : snapshot
28
55
 
29
56
  { ok: true, snapshot: result, challenge: challenge, nonce: nonce }
30
57
  end
31
58
 
59
+ def build_fingerprint_index(snapshot)
60
+ index = {}
61
+ snapshot.each do |el|
62
+ ref = el[:ref]
63
+ sel = el[:selector]
64
+ fp = el[:fingerprint]
65
+ next unless fp
66
+
67
+ index[ref] = fp if ref
68
+ index[sel] = fp if sel
69
+ end
70
+ index
71
+ end
72
+
32
73
  def compute_diff(prev, current)
33
74
  prev_by_sel = prev.to_h { |el| [el[:selector], el] }
34
75
  current.reject do |el|
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../../state"
5
+
6
+ module Browserctl
7
+ class CommandDispatcher
8
+ module Handlers
9
+ # Top-level state management — collapses cookies + localStorage +
10
+ # sessionStorage into a single `.bctl` bundle. See lib/browserctl/state.rb.
11
+ module State
12
+ private
13
+
14
+ def cmd_state_save(req)
15
+ first_session = @global_mutex.synchronize { @pages.values.first }
16
+ return { error: "no open pages — open a page before saving state" } unless first_session
17
+
18
+ payload, captured_origins = capture_state_payload
19
+ manifest = Browserctl::State.save(
20
+ req[:name],
21
+ payload: payload,
22
+ origins: req[:origins] || captured_origins,
23
+ flow: req[:flow],
24
+ flow_version: req[:flow_version],
25
+ passphrase: req[:passphrase]
26
+ )
27
+
28
+ {
29
+ ok: true,
30
+ path: Browserctl::State.path(req[:name]),
31
+ origins: manifest[:origins],
32
+ cookies: payload[:cookies].length,
33
+ encrypted: manifest[:encrypted]
34
+ }
35
+ rescue Browserctl::Error, ArgumentError => e
36
+ { error: e.message }
37
+ end
38
+
39
+ def cmd_state_load(req)
40
+ data = Browserctl::State.load(req[:name], passphrase: req[:passphrase])
41
+ target = @global_mutex.synchronize { @pages.values.first }
42
+ return { error: "no open pages — open a page before loading state" } unless target
43
+
44
+ cookies = pluck(data[:payload], :cookies, default: [])
45
+
46
+ unless req[:skip_auth_check]
47
+ auth = Browserctl::Detectors.auth_required(
48
+ target.page, cookies: cookies, suggested_flow: data[:manifest][:flow]
49
+ )
50
+ if auth.triggered
51
+ return Browserctl::AuthRequiredError.new(
52
+ auth.reason,
53
+ state: req[:name],
54
+ suggested_flow: auth.suggested_flow,
55
+ reason: auth.reason
56
+ ).to_response
57
+ end
58
+ end
59
+
60
+ restore_state_cookies(target, cookies)
61
+ ls_count = restore_local_storage(pluck(data[:payload], :local_storage, default: {}))
62
+
63
+ {
64
+ ok: true,
65
+ cookies: cookies.length,
66
+ local_storage_keys: ls_count,
67
+ origins: data[:manifest][:origins]
68
+ }
69
+ rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError, JSON::ParserError => e
70
+ { error: e.message }
71
+ end
72
+
73
+ def pluck(hash, sym, default:)
74
+ hash[sym] || hash[sym.to_s] || default
75
+ end
76
+
77
+ def restore_state_cookies(target, cookies)
78
+ cookies.each do |raw|
79
+ c = raw.transform_keys(&:to_sym)
80
+ target.page.cookies.set(**c.slice(:name, :value, :domain, :path))
81
+ end
82
+ end
83
+
84
+ def cmd_state_list(_req)
85
+ { ok: true, state: Browserctl::State.all }
86
+ end
87
+
88
+ def cmd_state_info(req)
89
+ { ok: true, info: Browserctl::State.info(req[:name]) }
90
+ rescue Browserctl::State::Bundle::BundleError, Browserctl::Error, ArgumentError => e
91
+ { error: e.message }
92
+ end
93
+
94
+ def cmd_state_delete(req)
95
+ Browserctl::State.delete(req[:name])
96
+ { ok: true }
97
+ rescue ArgumentError => e
98
+ { error: e.message }
99
+ end
100
+
101
+ def capture_state_payload
102
+ first = @global_mutex.synchronize { @pages.values.first }
103
+ cookies = first.page.cookies.all.values.map(&:to_h)
104
+
105
+ local_storage = {}
106
+ session_storage = {}
107
+ captured_origins = []
108
+
109
+ @global_mutex.synchronize { @pages.dup }.each_value do |session|
110
+ session.mutex.synchronize do
111
+ origin = session.page.evaluate("location.origin")
112
+ ls_str = session.page.evaluate("JSON.stringify({...localStorage})") || "{}"
113
+ ss_str = session.page.evaluate("JSON.stringify({...sessionStorage})") || "{}"
114
+ local_storage[origin] = JSON.parse(ls_str)
115
+ session_storage[origin] = JSON.parse(ss_str)
116
+ captured_origins << origin
117
+ end
118
+ end
119
+
120
+ payload = {
121
+ cookies: cookies,
122
+ local_storage: local_storage,
123
+ session_storage: session_storage
124
+ }
125
+ [payload, captured_origins.uniq]
126
+ end
127
+
128
+ def restore_local_storage(local_storage)
129
+ count = 0
130
+ local_storage.each do |origin, keys|
131
+ next if keys.nil? || keys.empty?
132
+
133
+ tmp_page = @driver.create_page
134
+ begin
135
+ tmp_page.go_to(origin.to_s)
136
+ keys.each do |k, v|
137
+ tmp_page.evaluate("localStorage.setItem(#{k.to_json}, #{v.to_json})")
138
+ count += 1
139
+ end
140
+ ensure
141
+ tmp_page.close
142
+ end
143
+ end
144
+ count
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
@@ -3,15 +3,17 @@
3
3
  module Browserctl
4
4
  class PageSession
5
5
  attr_reader :page, :mutex, :pause_cv
6
- attr_accessor :ref_registry, :prev_snapshot
6
+ attr_accessor :ref_registry, :prev_snapshot, :fingerprint_index, :snapshot_id
7
7
 
8
8
  def initialize(page)
9
- @page = page
10
- @mutex = Mutex.new
11
- @pause_cv = ConditionVariable.new
12
- @ref_registry = {}
13
- @prev_snapshot = nil
14
- @paused = false
9
+ @page = page
10
+ @mutex = Mutex.new
11
+ @pause_cv = ConditionVariable.new
12
+ @ref_registry = {}
13
+ @fingerprint_index = {}
14
+ @snapshot_id = nil
15
+ @prev_snapshot = nil
16
+ @paused = false
15
17
  end
16
18
 
17
19
  def paused? = @paused
@@ -1,55 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "browserctl/snapshot/extractor"
4
+ require "browserctl/snapshot/annotator"
5
+ require "browserctl/snapshot/serializer"
4
6
 
5
7
  module Browserctl
8
+ # Orchestrates the snapshot pipeline:
9
+ #
10
+ # page.body ──Extractor──▶ [nodes]
11
+ # ──Annotator──▶ [entries with ref + fingerprint]
12
+ # ──Serializer─▶ wire-shape array
13
+ #
14
+ # Each stage is independently testable. Inject alternates via the keyword
15
+ # args for tests that want to isolate one stage.
6
16
  class SnapshotBuilder
7
- INTERACTABLE = %w[a button input select textarea
8
- [role=button] [role=link] [role=menuitem]].freeze
9
- ATTRS = %w[type name placeholder href aria-label role].freeze
10
-
11
- def call(page)
12
- doc = Nokogiri::HTML(page.body)
13
- ref = 0
14
- doc.css(INTERACTABLE.join(",")).map { |el| element_entry(el, ref += 1) }
15
- end
16
-
17
- private
18
-
19
- def element_entry(elem, ref)
20
- { ref: "e#{ref}", tag: elem.name, text: elem.text.strip.slice(0, 80),
21
- selector: css_path(elem), attrs: element_attrs(elem) }
22
- end
23
-
24
- def element_attrs(elem)
25
- elem.attributes.transform_values(&:value).slice(*ATTRS)
26
- end
27
-
28
- def css_path(node)
29
- ancestors_until_html(node).map { |n| path_segment(n) }.join(" > ")
17
+ def initialize(extractor: Snapshot::Extractor.new,
18
+ annotator: Snapshot::Annotator.new,
19
+ serializer: Snapshot::Serializer.new)
20
+ @extractor = extractor
21
+ @annotator = annotator
22
+ @serializer = serializer
30
23
  end
31
24
 
32
- def ancestors_until_html(node)
33
- [].tap do |acc|
34
- while node && node.name != "html"
35
- acc.unshift(node)
36
- node = node.parent
37
- end
38
- end
39
- end
40
-
41
- def path_segment(node)
42
- node.name + id_fragment(node) + class_fragment(node)
43
- end
44
-
45
- def id_fragment(node)
46
- (id = node["id"]) && !id.empty? ? "##{id}" : ""
47
- end
48
-
49
- def class_fragment(node)
50
- return "" if node["id"] && !node["id"].empty?
51
-
52
- (klass = node["class"]&.split&.first) ? ".#{klass}" : ""
25
+ def call(page)
26
+ nodes = @extractor.call(page.body)
27
+ entries = @annotator.call(nodes)
28
+ @serializer.call(entries)
53
29
  end
54
30
  end
55
31
  end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "browserctl/snapshot/ref"
4
+ require "browserctl/snapshot/fingerprint"
5
+
6
+ module Browserctl
7
+ module Snapshot
8
+ # Stage 2 of the snapshot pipeline.
9
+ #
10
+ # Takes the list of interactable nodes from Extractor and produces
11
+ # element entries with stable refs, semantic metadata, a CSS selector
12
+ # path, and a fingerprint. Each entry is a plain Hash.
13
+ class Annotator
14
+ ATTRS = %w[type name placeholder href aria-label role].freeze
15
+
16
+ def initialize(ref_deriver: RefDeriver.new, fingerprint: Fingerprint.new)
17
+ @ref_deriver = ref_deriver
18
+ @fingerprint = fingerprint
19
+ end
20
+
21
+ def call(nodes)
22
+ taken = {}
23
+ nodes.map do |node|
24
+ ref = @ref_deriver.disambiguate(@ref_deriver.derive(node), taken)
25
+ taken[ref] = true
26
+ entry(node, ref)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def entry(node, ref)
33
+ {
34
+ ref: ref,
35
+ tag: node.name,
36
+ text: node.text.strip.slice(0, 80),
37
+ selector: css_path(node),
38
+ attrs: attrs(node),
39
+ fingerprint: @fingerprint.build(node)
40
+ }
41
+ end
42
+
43
+ def attrs(node)
44
+ node.attributes.transform_values(&:value).slice(*ATTRS)
45
+ end
46
+
47
+ def css_path(node)
48
+ ancestors_until_html(node).map { |n| segment(n) }.join(" > ")
49
+ end
50
+
51
+ def ancestors_until_html(node)
52
+ [].tap do |acc|
53
+ while node && node.name != "html"
54
+ acc.unshift(node)
55
+ node = node.parent
56
+ end
57
+ end
58
+ end
59
+
60
+ def segment(node)
61
+ node.name + id_fragment(node) + class_fragment(node)
62
+ end
63
+
64
+ def id_fragment(node)
65
+ (id = node["id"]) && !id.empty? ? "##{id}" : ""
66
+ end
67
+
68
+ def class_fragment(node)
69
+ return "" if node["id"] && !node["id"].empty?
70
+
71
+ (klass = node["class"]&.split&.first) ? ".#{klass}" : ""
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module Browserctl
6
+ module Snapshot
7
+ # Stage 1 of the snapshot pipeline.
8
+ #
9
+ # Parses raw HTML and returns the set of interactable Nokogiri nodes
10
+ # that the rest of the pipeline will annotate. This stage knows nothing
11
+ # about refs, fingerprints, or wire format.
12
+ class Extractor
13
+ INTERACTABLE = %w[a button input select textarea
14
+ [role=button] [role=link] [role=menuitem]].freeze
15
+
16
+ def call(html)
17
+ Nokogiri::HTML(html).css(INTERACTABLE.join(",")).to_a
18
+ end
19
+ end
20
+ end
21
+ end