browserbeam 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 968540abd1c037b539260f27cf439d8f49d05a25f16e398cd56f56dc6d2c4591
4
- data.tar.gz: fb7920bc04046dd96d62277826d2e3b4a83fce58d96961b65643686326c510d3
3
+ metadata.gz: 1f56eec67915879c1fb60e4c81cd34b4bd93d52cda25c5da6f72ad3def1f6a74
4
+ data.tar.gz: 58fbf4bab5ba5f29414cc4368310b2c120b2224a5455ea1946b8a86773aae7b3
5
5
  SHA512:
6
- metadata.gz: bfe0019f622ff4273922faa8bc386264e013c33706f4ae3fe4921c68e04a42e9b7340b61874801df22d14efbc98529de2b5ef6937469e857f78377461a8a8f66
7
- data.tar.gz: f4708420e29384d4a70d4ec10fe21d1d0b43a9cd83a9973c35d90991337e4175e1cd693da190cf0271d2e65b132958c1425ef1bd0b811171c18f12fd82aeee7e
6
+ metadata.gz: 7654e9332dd6745df0fff89d113d8de9b6120b8d632e09eb9e114de6120569fae5e0bc2a96e721881c3b099bb33e250d24562e3fcf8576db3aaa4ac84bb0acaf
7
+ data.tar.gz: 1a3bdeb98537414f79d9d8cc5d1e00e488d834e8cbe66e0080116afb00ca3bfc74904c8b028fc6965dbc3422967e7554e7fee1bfd7f2e6194864ad82f23393f9
data/README.md CHANGED
@@ -30,10 +30,17 @@ puts session.page.interactive_elements
30
30
  # Interact with the page
31
31
  session.click(ref: "e1")
32
32
 
33
- # Extract structured data
33
+ # Extract with CSS, AI, and JS selectors combined
34
34
  result = session.extract(
35
- title: "h1 >> text",
36
- links: ["a >> href"]
35
+ products: [{
36
+ "_parent": ".product-card",
37
+ "_limit": 3,
38
+ "name": "h2 >> text", # CSS selector
39
+ "price": ".price >> text", # CSS selector
40
+ "url": "a >> href", # CSS attribute
41
+ "rating": "ai >> the star rating out of 5", # AI selector
42
+ "in_stock": "js >> el.querySelector('.stock')?.textContent.includes('In stock')", # JS
43
+ }]
37
44
  )
38
45
  puts result.extraction
39
46
 
@@ -57,21 +64,39 @@ client = Browserbeam::Client.new(
57
64
  session = client.sessions.create(
58
65
  url: "https://example.com",
59
66
  viewport: { width: 1280, height: 720 },
67
+ user_agent: "Mozilla/5.0 ...", # omit for automatic rotation
60
68
  locale: "en-US",
61
69
  timezone: "America/New_York",
62
- proxy: "http://user:pass@proxy:8080",
63
70
  block_resources: ["image", "font"],
64
71
  auto_dismiss_blockers: true,
65
72
  timeout: 300,
66
73
  )
67
74
  ```
68
75
 
76
+ ### Proxies
77
+
78
+ All sessions use a datacenter proxy by default (country auto-detected from the URL's TLD). No configuration needed. To customize:
79
+
80
+ ```ruby
81
+ # Use a residential proxy for a specific country
82
+ session = client.sessions.create(
83
+ url: "https://example.com",
84
+ proxy: { kind: "residential", country: "us" },
85
+ )
86
+
87
+ # Or bring your own proxy (overrides managed proxy)
88
+ session = client.sessions.create(
89
+ url: "https://example.com",
90
+ proxy: "http://user:pass@proxy:8080",
91
+ )
92
+ ```
93
+
69
94
  ## Available Methods
70
95
 
71
96
  | Method | Description |
72
97
  |--------|-------------|
73
98
  | `session.goto(url)` | Navigate to a URL |
74
- | `session.observe` | Get page state as markdown |
99
+ | `session.observe` | Get page state as markdown. Supports `mode: "full"` for all sections. |
75
100
  | `session.click(ref:)` | Click an element by ref, text, or label |
76
101
  | `session.fill(value, ref:)` | Fill an input field |
77
102
  | `session.type(value, label:)` | Type text character by character |
@@ -87,6 +112,37 @@ session = client.sessions.create(
87
112
  | `session.execute_js(code)` | Run JavaScript |
88
113
  | `session.close` | Close the session |
89
114
 
115
+ ## Page Map & Full Mode
116
+
117
+ The first `observe` call automatically includes a `page.map` — a lightweight structural outline of the page's landmark regions (header, nav, main, aside, footer) with CSS selectors and descriptive hints. Use it to discover what content is available outside the main area.
118
+
119
+ ```ruby
120
+ res = session.observe
121
+ res.page.map.each { |entry| puts "#{entry.section}: #{entry.hint}" }
122
+ # nav: Home · Docs · Pricing
123
+ # main: Getting started with Browserbeam...
124
+ # aside: Related posts · Popular tags
125
+ ```
126
+
127
+ To re-request the map on subsequent calls:
128
+
129
+ ```ruby
130
+ session.observe(include_page_map: true)
131
+ ```
132
+
133
+ When you need content from **all** page sections (sidebars, footer links, nav items), use `mode: "full"`. The response markdown is organized by region headers:
134
+
135
+ ```ruby
136
+ full = session.observe(mode: "full", max_text_length: 20_000)
137
+ puts full.page.markdown.content
138
+ # ## [nav]
139
+ # Home · Docs · Pricing
140
+ # ## [main]
141
+ # ...article content...
142
+ # ## [aside]
143
+ # Related posts · ...
144
+ ```
145
+
90
146
  ## Session Management
91
147
 
92
148
  ```ruby
@@ -27,10 +27,12 @@ module Browserbeam
27
27
  act([{ goto: params }])
28
28
  end
29
29
 
30
- def observe(scope: nil, format: nil, include_links: nil, max_text_length: nil)
30
+ def observe(scope: nil, format: nil, mode: nil, include_page_map: nil, include_links: nil, max_text_length: nil)
31
31
  params = {}
32
32
  params[:scope] = scope if scope
33
33
  params[:format] = format if format
34
+ params[:mode] = mode if mode
35
+ params[:include_page_map] = include_page_map unless include_page_map.nil?
34
36
  params[:include_links] = include_links unless include_links.nil?
35
37
  params[:max_text_length] = max_text_length if max_text_length
36
38
  act([{ observe: params }])
@@ -18,6 +18,12 @@ module Browserbeam
18
18
  end
19
19
  end
20
20
 
21
+ MapEntry = Struct.new(:section, :selector, :hint, keyword_init: true) do
22
+ def self.from_hash(data)
23
+ new(section: data["section"] || "", selector: data["selector"] || "", hint: data["hint"] || "")
24
+ end
25
+ end
26
+
21
27
  Changes = Struct.new(:content_changed, :content_delta, :elements_added, :elements_removed, keyword_init: true) do
22
28
  def self.from_hash(data)
23
29
  return nil unless data.is_a?(Hash)
@@ -37,7 +43,7 @@ module Browserbeam
37
43
  end
38
44
  end
39
45
 
40
- PageState = Struct.new(:url, :title, :stable, :markdown, :interactive_elements, :forms, :changes, :scroll, keyword_init: true) do
46
+ PageState = Struct.new(:url, :title, :stable, :markdown, :map, :interactive_elements, :forms, :changes, :scroll, keyword_init: true) do
41
47
  def self.from_hash(data)
42
48
  return nil unless data.is_a?(Hash)
43
49
  new(
@@ -45,6 +51,7 @@ module Browserbeam
45
51
  title: data["title"] || "",
46
52
  stable: data["stable"] || false,
47
53
  markdown: MarkdownContent.from_hash(data["markdown"]),
54
+ map: data["map"] ? data["map"].map { |m| MapEntry.from_hash(m) } : nil,
48
55
  interactive_elements: (data["interactive_elements"] || []).map { |el| InteractiveElement.from_hash(el) },
49
56
  forms: data["forms"] || [],
50
57
  changes: Changes.from_hash(data["changes"]),
@@ -1,3 +1,3 @@
1
1
  module Browserbeam
2
- VERSION = "0.2.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: browserbeam
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Browserbeam
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-25 00:00:00.000000000 Z
11
+ date: 2026-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday