brightdata 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +38 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +149 -0
  5. data/lib/brightdata/client.rb +25 -0
  6. data/lib/brightdata/datasets.rb +31 -0
  7. data/lib/brightdata/errors.rb +101 -0
  8. data/lib/brightdata/http.rb +195 -0
  9. data/lib/brightdata/linkedin/companies.rb +32 -0
  10. data/lib/brightdata/linkedin/endpoint.rb +195 -0
  11. data/lib/brightdata/linkedin/jobs.rb +83 -0
  12. data/lib/brightdata/linkedin/namespace.rb +32 -0
  13. data/lib/brightdata/linkedin/people.rb +39 -0
  14. data/lib/brightdata/linkedin/posts.rb +97 -0
  15. data/lib/brightdata/linkedin/profiles.rb +32 -0
  16. data/lib/brightdata/linkedin/types/company.rb +92 -0
  17. data/lib/brightdata/linkedin/types/company_url_input.rb +13 -0
  18. data/lib/brightdata/linkedin/types/discovered_profile.rb +45 -0
  19. data/lib/brightdata/linkedin/types/job.rb +54 -0
  20. data/lib/brightdata/linkedin/types/job_keyword_input.rb +44 -0
  21. data/lib/brightdata/linkedin/types/job_url_input.rb +13 -0
  22. data/lib/brightdata/linkedin/types/people_discover_input.rb +24 -0
  23. data/lib/brightdata/linkedin/types/post.rb +67 -0
  24. data/lib/brightdata/linkedin/types/post_company_url_input.rb +13 -0
  25. data/lib/brightdata/linkedin/types/post_profile_url_input.rb +13 -0
  26. data/lib/brightdata/linkedin/types/post_url_input.rb +13 -0
  27. data/lib/brightdata/linkedin/types/profile.rb +81 -0
  28. data/lib/brightdata/linkedin/types/profile_url_input.rb +14 -0
  29. data/lib/brightdata/live_trace.rb +124 -0
  30. data/lib/brightdata/result.rb +27 -0
  31. data/lib/brightdata/snapshot.rb +122 -0
  32. data/lib/brightdata/version.rb +6 -0
  33. data/lib/brightdata.rb +60 -0
  34. data/llm.md +109 -0
  35. metadata +193 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 22a0b2f8657a178d69eb1c6c3220db2d086425af48c8388b68b2633d6d9ff194
4
+ data.tar.gz: cd967fbe25a97ff0eb0cad7b1319a830c98684d406bac3add844dc075b3a1ab2
5
+ SHA512:
6
+ metadata.gz: bec94e0b1b6d38365cd5f0d77579159ef0b3bbe359378c2f4f76a0bf984b52be62f010c608421b59a39d63cb17ce5a004cd7e440b2e87e5797c5a7d0913f94c1
7
+ data.tar.gz: 204b8987e39b9284fb956373a75f349b8ddcaccb62a6a917ef3a99de4942135486354bb8f90ec247c9ea2374632c408597a50b78bba8d0522f6d5eab493c55d2
data/CHANGELOG.md ADDED
@@ -0,0 +1,38 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
5
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2026-05-28
10
+
11
+ Initial public release.
12
+
13
+ ### Added
14
+ - `BrightData::Client` with configurable `api_token`, `base_url`, and `logger`.
15
+ - LinkedIn endpoints under `client.linkedin`:
16
+ - `profiles` and `companies` (collect by URL)
17
+ - `jobs.collect_by_url`, `jobs.discover_by_url`, `jobs.discover_by_keyword`
18
+ - `posts.collect_by_url`, `posts.discover_by_url`,
19
+ `posts.discover_by_profile_url`, `posts.discover_by_company_url`
20
+ - `people.discover_new_profiles`
21
+ - Every endpoint exposes both `#scrape` (synchronous, parsed results) and
22
+ `#trigger` (asynchronous, returns a `Snapshot`).
23
+ - `BrightData::Snapshot#wait` polling with configurable `timeout` and
24
+ `poll_interval`, returning a `BrightData::Result` (`SimpleResult::Success` or
25
+ `Failure`).
26
+ - Typed `Data`-backed result objects (`Profile`, `Company`, `Job`, `Post`,
27
+ `DiscoveredProfile`) and input objects (`JobKeywordInput`,
28
+ `PeopleDiscoverInput`, and `*UrlInput` variants), each exposing typed readers
29
+ plus `#raw`.
30
+ - Error hierarchy rooted at `BrightData::Error`: `ConfigurationError`,
31
+ `ArgumentError`, `AuthError`, `HTTPError`, `RateLimitError` (`#retry_after`),
32
+ `ServerError`, `ScrapeTimeoutError` (`#snapshot`).
33
+ - Mutable `BrightData::Datasets::LINKEDIN` registry for overriding or adding
34
+ dataset IDs at runtime.
35
+ - Optional request/response tracing via `BRIGHTDATA_LIVE`, recorded under
36
+ `tmp/live/`.
37
+ - `llm.md` — single-file, LLM-friendly reference generated from YARD docs via
38
+ `bin/generate_llm.rb` and `bin/prepare_release`.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Lucian Ghinda
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,149 @@
1
+ # brightdata
2
+
3
+ A typed, ergonomic Ruby client for [Bright Data](https://brightdata.com)'s
4
+ Datasets v3 scraper APIs. Version 0.1.0 ships the LinkedIn endpoints.
5
+
6
+ ## Installation
7
+
8
+ Add it to your Gemfile:
9
+
10
+ ```ruby
11
+ gem "brightdata"
12
+ ```
13
+
14
+ Then run `bundle install`, or install it directly:
15
+
16
+ ```sh
17
+ gem install brightdata
18
+ ```
19
+
20
+ Requires Ruby 3.4.4 or newer.
21
+
22
+ ## Configuration
23
+
24
+ Create a client with your Bright Data API token:
25
+
26
+ ```ruby
27
+ client = BrightData::Client.new(api_token: ENV.fetch("BRIGHTDATA_API_TOKEN"))
28
+ ```
29
+
30
+ Optional keyword arguments:
31
+
32
+ - `base_url:` — override the API host (defaults to `https://api.brightdata.com`).
33
+ - `logger:` — a `Logger` that receives one `debug` line per request.
34
+
35
+ ## Synchronous vs. asynchronous
36
+
37
+ Every endpoint exposes two methods:
38
+
39
+ - `#scrape(...)` runs synchronously and returns parsed, typed results. Bright
40
+ Data caps synchronous scrapes at 60 seconds; if a job exceeds that,
41
+ `scrape` raises `BrightData::ScrapeTimeoutError`, which carries a resumable
42
+ snapshot (`error.snapshot`).
43
+ - `#trigger(...)` starts an asynchronous collection and returns a
44
+ `BrightData::Snapshot` you poll with `#wait`.
45
+
46
+ ```ruby
47
+ # Synchronous
48
+ profiles = client.linkedin.profiles.scrape(
49
+ urls: ["https://www.linkedin.com/in/example/"]
50
+ )
51
+ profiles.first.name # => "Example Person"
52
+
53
+ # Asynchronous
54
+ snapshot = client.linkedin.profiles.trigger(
55
+ urls: ["https://www.linkedin.com/in/example/"]
56
+ )
57
+ result = snapshot.wait # blocks, polling progress until ready/failed/timeout
58
+ if result.success?
59
+ result.payload # => Array<BrightData::LinkedIn::Types::Profile>
60
+ else
61
+ result.error # => raw failure payload from Bright Data
62
+ end
63
+ ```
64
+
65
+ `Snapshot#wait` accepts `timeout:` (default 300s) and `poll_interval:`
66
+ (default 5s), and raises `BrightData::ScrapeTimeoutError` if the deadline
67
+ passes before the snapshot reaches a terminal state.
68
+
69
+ ## LinkedIn endpoints
70
+
71
+ | Call | Argument | Returns |
72
+ | --- | --- | --- |
73
+ | `linkedin.profiles` | `urls:` | `Types::Profile` |
74
+ | `linkedin.companies` | `urls:` | `Types::Company` |
75
+ | `linkedin.jobs.collect_by_url` | `urls:` | `Types::Job` |
76
+ | `linkedin.jobs.discover_by_url` | `urls:` | `Types::Job` |
77
+ | `linkedin.jobs.discover_by_keyword` | `queries:` (`Types::JobKeywordInput`) | `Types::Job` |
78
+ | `linkedin.posts.collect_by_url` | `urls:` | `Types::Post` |
79
+ | `linkedin.posts.discover_by_url` | `urls:` | `Types::Post` |
80
+ | `linkedin.posts.discover_by_profile_url` | `profile_urls:` | `Types::Post` |
81
+ | `linkedin.posts.discover_by_company_url` | `company_urls:` | `Types::Post` |
82
+ | `linkedin.people.discover_new_profiles` | `queries:` (`Types::PeopleDiscoverInput`) | `Types::DiscoveredProfile` |
83
+
84
+ ### Discovery by keyword
85
+
86
+ ```ruby
87
+ query = BrightData::LinkedIn::Types::JobKeywordInput.new(
88
+ location: "New York",
89
+ keyword: "ruby",
90
+ country: nil, time_range: nil, job_type: nil, experience_level: nil,
91
+ remote: nil, company: nil, selective_search: nil,
92
+ jobs_to_not_include: nil, location_radius: nil
93
+ )
94
+
95
+ jobs = client.linkedin.jobs.discover_by_keyword.scrape(queries: [query])
96
+ ```
97
+
98
+ `nil` fields are omitted from the request payload.
99
+
100
+ ## Result types
101
+
102
+ Results are immutable `Data` value objects (`Types::Profile`, `Types::Company`,
103
+ `Types::Job`, `Types::Post`, `Types::DiscoveredProfile`). Each exposes typed
104
+ readers for the common fields plus `#raw`, the full parsed response hash, so you
105
+ can reach fields the gem does not yet type:
106
+
107
+ ```ruby
108
+ profile = profiles.first
109
+ profile.name # typed reader
110
+ profile.raw[:posts] # anything not yet typed
111
+ ```
112
+
113
+ ## Error handling
114
+
115
+ All errors inherit from `BrightData::Error`:
116
+
117
+ - `BrightData::ConfigurationError` — blank API token.
118
+ - `BrightData::ArgumentError` — bad argument shape (note: not Ruby's
119
+ `::ArgumentError`).
120
+ - `BrightData::AuthError` — 401/403 from the API.
121
+ - `BrightData::RateLimitError` — 429; exposes `#retry_after`.
122
+ - `BrightData::ServerError` — 5xx.
123
+ - `BrightData::HTTPError` — other transport failures and timeouts.
124
+ - `BrightData::ScrapeTimeoutError` — synchronous scrape exceeded the 60s cap;
125
+ recover via `error.snapshot.wait`.
126
+
127
+ ```ruby
128
+ begin
129
+ client.linkedin.profiles.scrape(urls: urls)
130
+ rescue BrightData::ScrapeTimeoutError => e
131
+ e.snapshot.wait # fall back to async polling
132
+ rescue BrightData::RateLimitError => e
133
+ sleep(e.retry_after || 5)
134
+ retry
135
+ rescue BrightData::Error => e
136
+ warn "Bright Data request failed: #{e.message}"
137
+ end
138
+ ```
139
+
140
+ ## Documentation for AI agents
141
+
142
+ [`llm.md`](llm.md) is a single-file, LLM-friendly reference generated from the
143
+ gem's YARD documentation. Point a coding assistant at it for the full API
144
+ surface and usage examples. Regenerate it with `bin/prepare_release` (or
145
+ `bundle exec yardoc --format=markdown && bin/generate_llm.rb`).
146
+
147
+ ## License
148
+
149
+ Released under the [MIT License](LICENSE.txt).
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BrightData
4
+ # Top-level Bright Data API client.
5
+ #
6
+ # @example
7
+ # client = BrightData::Client.new(api_token: ENV.fetch("BRIGHTDATA_API_TOKEN"))
8
+ # client.linkedin
9
+ class Client
10
+ # @return [BrightData::HTTP] underlying HTTP wrapper
11
+ attr_reader :http
12
+
13
+ # @return [BrightData::LinkedIn::Namespace] LinkedIn endpoint namespace
14
+ attr_reader :linkedin
15
+
16
+ # @param api_token [String] Bright Data API token
17
+ # @param base_url [String] override Bright Data API base URL
18
+ # @param logger [Logger, nil] optional logger for request tracing
19
+ # @raise [BrightData::ConfigurationError] if `api_token` is nil or empty
20
+ def initialize(api_token:, base_url: BrightData::HTTP::BASE_URL, logger: nil)
21
+ @http = HTTP.new(api_token:, base_url:, logger:)
22
+ @linkedin = LinkedIn::Namespace.new(http: @http)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BrightData
4
+ # Registry of Bright Data dataset IDs keyed by symbolic endpoint name.
5
+ module Datasets
6
+ # @return [Hash{Symbol=>String}] LinkedIn dataset IDs
7
+ LINKEDIN = { # rubocop:disable Style/MutableConstant -- intentionally mutable so callers can register or override dataset IDs
8
+ profiles_collect_by_url: "gd_l1viktl72bvl7bjuj0",
9
+ companies_collect_by_url: "gd_l1vikfnt1wgvvqz95w",
10
+ jobs_collect_by_url: "gd_lpfll7v5hcqtkxl6l",
11
+ jobs_discover_by_url: "gd_lpfll7v5hcqtkxl6l",
12
+ jobs_discover_by_keyword: "gd_lpfll7v5hcqtkxl6l",
13
+ posts_collect_by_url: "gd_lyy3tktm25m4avu764",
14
+ posts_discover_by_profile_url: "gd_lyy3tktm25m4avu764",
15
+ posts_discover_by_url: "gd_lyy3tktm25m4avu764",
16
+ posts_discover_by_company_url: "gd_lyy3tktm25m4avu764",
17
+ people_discover_new_profiles: "gd_m8d03he47z8nwb5xc"
18
+ }
19
+
20
+ # Fetch a LinkedIn dataset ID.
21
+ #
22
+ # @param key [Symbol] symbolic endpoint name
23
+ # @return [String] dataset ID
24
+ # @raise [BrightData::ArgumentError] if key is unknown
25
+ def self.id_for(key)
26
+ LINKEDIN.fetch(key) do
27
+ raise ::BrightData::ArgumentError, "Unknown LinkedIn dataset key: #{key.inspect}"
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BrightData
4
+ # Base error class. Rescue this to catch any BrightData failure.
5
+ class Error < StandardError; end
6
+
7
+ # Raised when the gem is misconfigured, for example when `api_token` is blank.
8
+ class ConfigurationError < Error; end
9
+
10
+ # Raised when a caller passes an invalid argument shape or value.
11
+ #
12
+ # This class intentionally inherits from {BrightData::Error}, not from
13
+ # `::ArgumentError`. Use `rescue BrightData::ArgumentError` or
14
+ # `rescue BrightData::Error`.
15
+ class ArgumentError < Error; end
16
+
17
+ # Raised on 401/403 responses from the Bright Data API.
18
+ class AuthError < Error; end
19
+
20
+ # Base class for transport-level errors.
21
+ class HTTPError < Error
22
+ # @return [Integer, nil] HTTP status, or nil if the request never completed
23
+ attr_reader :status
24
+
25
+ # @return [String, nil] raw response body
26
+ attr_reader :body
27
+
28
+ # @return [Net::HTTPResponse, nil] raw Net::HTTP response
29
+ attr_reader :response
30
+
31
+ # @param message [String] human-readable error message
32
+ # @param status [Integer, nil] HTTP status, or nil if unavailable
33
+ # @param body [String, nil] raw response body
34
+ # @param response [Net::HTTPResponse, nil] raw Net::HTTP response
35
+ def initialize(message, status: nil, body: nil, response: nil)
36
+ super(message)
37
+ @status = status
38
+ @body = body
39
+ @response = response
40
+ end
41
+ end
42
+
43
+ # Raised on 429 Too Many Requests responses.
44
+ class RateLimitError < HTTPError
45
+ # @return [Integer, nil] value of the `Retry-After` header in seconds, or nil if absent
46
+ attr_reader :retry_after
47
+
48
+ # @param message [String] human-readable error message
49
+ # @param status [Integer] HTTP status
50
+ # @param body [String, nil] raw response body
51
+ # @param response [Net::HTTPResponse, nil] raw Net::HTTP response
52
+ # @param retry_after [Integer, nil] retry delay in seconds
53
+ def initialize(message, status: 429, body: nil, response: nil, retry_after: nil)
54
+ super(message, status:, body:, response:)
55
+ @retry_after = retry_after
56
+ end
57
+ end
58
+
59
+ # Raised on 5xx responses from the Bright Data API.
60
+ class ServerError < HTTPError; end
61
+
62
+ # Raised when `/scrape` exceeds Bright Data's 60-second synchronous cap.
63
+ class ScrapeTimeoutError < Error
64
+ # @return [String] snapshot ID returned by Bright Data
65
+ attr_reader :snapshot_id
66
+
67
+ # @return [BrightData::Snapshot, nil] resumable snapshot returned by Bright Data
68
+ attr_reader :snapshot
69
+
70
+ # @param message [String] human-readable error message
71
+ # @param snapshot_id [String] snapshot ID returned by Bright Data
72
+ # @param snapshot [BrightData::Snapshot, nil] snapshot object that can be waited on
73
+ def initialize(message, snapshot_id:, snapshot: nil)
74
+ super(message)
75
+ @snapshot_id = snapshot_id
76
+ @snapshot = snapshot
77
+ end
78
+ end
79
+
80
+ # Raised by explicit exception-based snapshot failure flows.
81
+ #
82
+ # `Snapshot#wait` normally returns `SimpleResult::Failure` on failed
83
+ # snapshots. This class is reserved for callers who opt into exception
84
+ # semantics in future API versions.
85
+ class SnapshotFailedError < Error
86
+ # @return [String] snapshot ID
87
+ attr_reader :snapshot_id
88
+
89
+ # @return [Hash, nil] failure details returned by Bright Data
90
+ attr_reader :details
91
+
92
+ # @param message [String] human-readable error message
93
+ # @param snapshot_id [String] snapshot ID
94
+ # @param details [Hash, nil] failure details returned by Bright Data
95
+ def initialize(message, snapshot_id:, details: nil)
96
+ super(message)
97
+ @snapshot_id = snapshot_id
98
+ @details = details
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "net/http"
5
+ require "openssl"
6
+ require "uri"
7
+
8
+ module BrightData
9
+ # Thin Net::HTTP wrapper. Single point of egress for the gem.
10
+ #
11
+ # Optional request tracing lives in {LiveTrace}, kept out of this class so the
12
+ # request path stays a small, readable Net::HTTP shim.
13
+ class HTTP # rubocop:disable Metrics/ClassLength -- deliberately one cohesive HTTP shim; splitting it would scatter the request path
14
+ # @return [String] default Bright Data API base URL
15
+ BASE_URL = "https://api.brightdata.com"
16
+
17
+ # Must be greater than Bright Data's `/scrape` 60-second API cap.
18
+ # @return [Integer] default socket read timeout in seconds
19
+ DEFAULT_TIMEOUT = 90
20
+
21
+ # @param api_token [String] Bright Data API token
22
+ # @param base_url [String] override for testing
23
+ # @param open_timeout [Integer] TCP open timeout in seconds
24
+ # @param read_timeout [Integer] socket read timeout in seconds
25
+ # @param logger [Logger, nil] optional logger for debug request traces
26
+ # @raise [BrightData::ConfigurationError] if `api_token` is nil or empty
27
+ def initialize(api_token:, base_url: BASE_URL, open_timeout: 10, read_timeout: DEFAULT_TIMEOUT, logger: nil)
28
+ raise ConfigurationError, "api_token is required" if api_token.nil? || api_token.empty?
29
+
30
+ @api_token = api_token
31
+ @base_url = base_url
32
+ @open_timeout = open_timeout
33
+ @read_timeout = read_timeout
34
+ @logger = logger
35
+ end
36
+
37
+ # POST a JSON body.
38
+ #
39
+ # @param path [String] API path
40
+ # @param query [Hash] query string params
41
+ # @param body [Hash, nil] JSON body to encode
42
+ # @return [Hash, Array, nil] parsed JSON body
43
+ # @raise [BrightData::AuthError, BrightData::RateLimitError, BrightData::ServerError, BrightData::HTTPError]
44
+ def post(path:, query: {}, body: nil)
45
+ request(method: :post, path:, query:, body:)
46
+ end
47
+
48
+ # GET a path.
49
+ #
50
+ # @param path [String] API path
51
+ # @param query [Hash] query string params
52
+ # @return [Hash, Array, nil] parsed JSON body
53
+ # @raise [BrightData::AuthError, BrightData::RateLimitError, BrightData::ServerError, BrightData::HTTPError]
54
+ def get(path:, query: {})
55
+ request(method: :get, path:, query:)
56
+ end
57
+
58
+ private
59
+
60
+ def request(method:, path:, query: {}, body: nil)
61
+ uri = build_uri(path:, query:)
62
+ trace = LiveTrace.for(LiveTrace::Request.new(method:, path:, query:, body:, uri:))
63
+
64
+ response, duration = timed_perform(uri:, req: build_request(method:, uri:, body:))
65
+ trace.record_response(response:, duration:)
66
+
67
+ log_request(method:, path:, status: response.code.to_i, duration:)
68
+
69
+ handle_response(response)
70
+ rescue StandardError => e
71
+ trace&.record_error(e)
72
+ raise
73
+ end
74
+
75
+ def timed_perform(uri:, req:)
76
+ started = Process.clock_gettime(Process::CLOCK_MONOTONIC)
77
+
78
+ response = perform(uri:, req:)
79
+
80
+ [response, Process.clock_gettime(Process::CLOCK_MONOTONIC) - started]
81
+ end
82
+
83
+ def build_uri(path:, query:)
84
+ uri = URI.parse("#{@base_url}#{path}")
85
+ uri.query = URI.encode_www_form(query) unless query.empty?
86
+
87
+ uri
88
+ end
89
+
90
+ def build_request(method:, uri:, body:)
91
+ klass = if method == :post
92
+ Net::HTTP::Post
93
+ else
94
+ Net::HTTP::Get
95
+ end
96
+ req = klass.new(uri)
97
+
98
+ apply_headers(req)
99
+ req.body = JSON.generate(body) if body
100
+
101
+ req
102
+ end
103
+
104
+ def apply_headers(req)
105
+ req["Authorization"] = "Bearer #{@api_token}"
106
+ req["Content-Type"] = "application/json"
107
+ req["Accept"] = "application/json"
108
+ end
109
+
110
+ def perform(uri:, req:) # rubocop:disable Metrics/MethodLength -- This is a simple method, the lines are from the Net::HTTP params
111
+ Net::HTTP.start(
112
+ uri.host,
113
+ uri.port,
114
+ use_ssl: uri.scheme == "https",
115
+ open_timeout: @open_timeout,
116
+ read_timeout: @read_timeout
117
+ ) do |http|
118
+ http.request(req)
119
+ end
120
+ rescue Net::OpenTimeout, Net::ReadTimeout => e
121
+ raise HTTPError.new("Timeout: #{e.message}", status: nil, body: nil)
122
+ rescue SocketError, SystemCallError, OpenSSL::SSL::SSLError, IOError => e
123
+ raise HTTPError.new("Connection failed: #{e.message}", status: nil, body: nil)
124
+ end
125
+
126
+ def handle_response(response)
127
+ status = response.code.to_i
128
+ return parse_body(response) if (200..299).cover?(status)
129
+
130
+ raise error_for(response, status)
131
+ end
132
+
133
+ # Factory mapping an error status to its exception. A dispatch at one level
134
+ # of abstraction, so it stays a single `case`.
135
+ def error_for(response, status) # rubocop:disable Metrics/MethodLength -- A case should be together all the time
136
+ body = response.body.to_s
137
+ case status
138
+ when 401, 403
139
+ AuthError.new("Bright Data API rejected the token (status #{status}): #{body}")
140
+ when 429
141
+ RateLimitError.new(
142
+ "Bright Data rate limit hit (status 429)",
143
+ status:,
144
+ body:,
145
+ response:,
146
+ retry_after: parse_retry_after(response)
147
+ )
148
+ when 500..599
149
+ ServerError.new("Bright Data server error (status #{status})", status:, body:, response:)
150
+ else
151
+ HTTPError.new("Unexpected HTTP status #{status}", status:, body:, response:)
152
+ end
153
+ end
154
+
155
+ def parse_body(response)
156
+ text = response.body.to_s
157
+ return nil if text.empty?
158
+
159
+ JSON.parse(text, symbolize_names: true)
160
+ rescue JSON::ParserError => e
161
+ return parse_json_lines(text, response:) if json_lines?(text, response:)
162
+
163
+ raise HTTPError.new("Invalid JSON response: #{e.message}", status: response.code.to_i, body: text, response:)
164
+ end
165
+
166
+ def json_lines?(text, response:)
167
+ response["content-type"].to_s.include?("jsonl") || text.lines.count > 1
168
+ end
169
+
170
+ def parse_json_lines(text, response:)
171
+ text.each_line.filter_map do |line|
172
+ stripped = line.strip
173
+ next if stripped.empty?
174
+
175
+ JSON.parse(stripped, symbolize_names: true)
176
+ end
177
+ rescue JSON::ParserError => e
178
+ raise HTTPError.new("Invalid JSONL response: #{e.message}", status: response.code.to_i, body: text, response:)
179
+ end
180
+
181
+ def parse_retry_after(response)
182
+ raw = response["retry-after"] || response["Retry-After"]
183
+ Integer(raw, 10) if raw&.match?(/\A\d+\z/)
184
+ end
185
+
186
+ def log_request(method:, path:, status:, duration:)
187
+ return unless @logger
188
+
189
+ @logger.debug do
190
+ format("[brightdata] %<method>s %<path>s -> %<status>d in %<duration>.3fs",
191
+ method: method.upcase, path:, status:, duration:)
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BrightData
4
+ module LinkedIn
5
+ # `client.linkedin.companies` endpoint family for LinkedIn companies by URL.
6
+ #
7
+ # @example Trigger an async collection
8
+ # snapshot = client.linkedin.companies.trigger(urls: ["https://www.linkedin.com/company/example/"])
9
+ # @example Scrape synchronously
10
+ # companies = client.linkedin.companies.scrape(urls: ["https://www.linkedin.com/company/example/"])
11
+ #
12
+ # @!method trigger(urls:)
13
+ # @param urls [Array<String>] LinkedIn company URLs
14
+ # @return [BrightData::Snapshot]
15
+ # @raise [BrightData::ArgumentError] if `urls` is not an Array
16
+ # @!method scrape(urls:)
17
+ # @param urls [Array<String>] LinkedIn company URLs
18
+ # @return [Array<BrightData::LinkedIn::Types::Company>]
19
+ # @raise [BrightData::ArgumentError] if `urls` is not an Array
20
+ # @raise [BrightData::ScrapeTimeoutError] when results exceed Bright Data's synchronous cap
21
+ class Companies
22
+ include Endpoint
23
+
24
+ endpoint(
25
+ dataset_key: :companies_collect_by_url,
26
+ input: Types::CompanyUrlInput,
27
+ result: Types::Company,
28
+ param: :urls
29
+ )
30
+ end
31
+ end
32
+ end