brightdata 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +38 -0
- data/LICENSE.txt +21 -0
- data/README.md +149 -0
- data/lib/brightdata/client.rb +25 -0
- data/lib/brightdata/datasets.rb +31 -0
- data/lib/brightdata/errors.rb +101 -0
- data/lib/brightdata/http.rb +195 -0
- data/lib/brightdata/linkedin/companies.rb +32 -0
- data/lib/brightdata/linkedin/endpoint.rb +195 -0
- data/lib/brightdata/linkedin/jobs.rb +83 -0
- data/lib/brightdata/linkedin/namespace.rb +32 -0
- data/lib/brightdata/linkedin/people.rb +39 -0
- data/lib/brightdata/linkedin/posts.rb +97 -0
- data/lib/brightdata/linkedin/profiles.rb +32 -0
- data/lib/brightdata/linkedin/types/company.rb +92 -0
- data/lib/brightdata/linkedin/types/company_url_input.rb +13 -0
- data/lib/brightdata/linkedin/types/discovered_profile.rb +45 -0
- data/lib/brightdata/linkedin/types/job.rb +54 -0
- data/lib/brightdata/linkedin/types/job_keyword_input.rb +44 -0
- data/lib/brightdata/linkedin/types/job_url_input.rb +13 -0
- data/lib/brightdata/linkedin/types/people_discover_input.rb +24 -0
- data/lib/brightdata/linkedin/types/post.rb +67 -0
- data/lib/brightdata/linkedin/types/post_company_url_input.rb +13 -0
- data/lib/brightdata/linkedin/types/post_profile_url_input.rb +13 -0
- data/lib/brightdata/linkedin/types/post_url_input.rb +13 -0
- data/lib/brightdata/linkedin/types/profile.rb +81 -0
- data/lib/brightdata/linkedin/types/profile_url_input.rb +14 -0
- data/lib/brightdata/live_trace.rb +124 -0
- data/lib/brightdata/result.rb +27 -0
- data/lib/brightdata/snapshot.rb +122 -0
- data/lib/brightdata/version.rb +6 -0
- data/lib/brightdata.rb +60 -0
- data/llm.md +109 -0
- metadata +193 -0
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
# Namespace for Bright Data LinkedIn scraper endpoints.
|
|
5
|
+
module LinkedIn
|
|
6
|
+
# Shared trigger/scrape helpers for LinkedIn endpoint classes.
|
|
7
|
+
#
|
|
8
|
+
# Including this module gives a class the {ClassMethods#endpoint} macro,
|
|
9
|
+
# which declares a whole endpoint mode (its `#initialize`, `#trigger`,
|
|
10
|
+
# `#scrape`, input validation, and result parsing) from a few keywords.
|
|
11
|
+
#
|
|
12
|
+
# @api private
|
|
13
|
+
module Endpoint
|
|
14
|
+
# @return [String] Bright Data trigger path
|
|
15
|
+
TRIGGER_PATH = "/datasets/v3/trigger"
|
|
16
|
+
|
|
17
|
+
# @return [String] Bright Data synchronous scrape path
|
|
18
|
+
SCRAPE_PATH = "/datasets/v3/scrape"
|
|
19
|
+
|
|
20
|
+
# @param base [Class] including class
|
|
21
|
+
def self.included(base)
|
|
22
|
+
base.extend(ClassMethods)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Declarative endpoint builder mixed into classes that include {Endpoint}.
|
|
26
|
+
module ClassMethods
|
|
27
|
+
# Declare an endpoint mode.
|
|
28
|
+
#
|
|
29
|
+
# Generates `#initialize(http:)`, `#trigger`/`#scrape` (with a real
|
|
30
|
+
# keyword argument named after `param`), input validation, and result
|
|
31
|
+
# parsing. Pass either `input` (each value is a URL string wrapped in
|
|
32
|
+
# that input class) or `input_type` (each value must already be an
|
|
33
|
+
# instance of that type).
|
|
34
|
+
#
|
|
35
|
+
# @param dataset_key [Symbol] key in {BrightData::Datasets::LINKEDIN}
|
|
36
|
+
# @param result [#from_api] typed result class for parsing responses
|
|
37
|
+
# @param param [Symbol] public keyword argument name, e.g. `:urls`
|
|
38
|
+
# @param input [Class, nil] URL-input class wrapping each string value
|
|
39
|
+
# @param input_type [Class, nil] required type for each pre-built value
|
|
40
|
+
# @param extra_query [Hash] extra Bright Data query params
|
|
41
|
+
# @return [void]
|
|
42
|
+
def endpoint(dataset_key:, result:, param:, input: nil, input_type: nil, extra_query: {}) # rubocop:disable Metrics/ParameterLists -- each keyword names one facet of the endpoint DSL
|
|
43
|
+
const_set(:DATASET_KEY, dataset_key)
|
|
44
|
+
const_set(:EXTRA_QUERY, extra_query.freeze)
|
|
45
|
+
define_initialize
|
|
46
|
+
define_result_parser(result)
|
|
47
|
+
input ? define_url_inputs(param, input) : define_object_inputs(param, input_type)
|
|
48
|
+
define_actions(param)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def define_initialize
|
|
54
|
+
define_method(:initialize) { |http:| @http = http }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def define_result_parser(result)
|
|
58
|
+
define_method(:result_parser) do
|
|
59
|
+
->(raw) { result_items(raw).map { |hash| result.from_api(hash) } }
|
|
60
|
+
end
|
|
61
|
+
private :result_parser
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def define_url_inputs(param, input)
|
|
65
|
+
label = param.to_s
|
|
66
|
+
define_method(:build_inputs) do |values|
|
|
67
|
+
raise ArgumentError, "#{label}: must be an Array, got #{values.class}" unless values.is_a?(Array)
|
|
68
|
+
|
|
69
|
+
values.map { |url| input.new(url:) }
|
|
70
|
+
end
|
|
71
|
+
private :build_inputs
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def define_object_inputs(param, input_type)
|
|
75
|
+
label = param.to_s
|
|
76
|
+
type_name = input_type.name.split("::").last
|
|
77
|
+
define_method(:build_inputs) do |values|
|
|
78
|
+
raise ArgumentError, "#{label}: must be an Array, got #{values.class}" unless values.is_a?(Array)
|
|
79
|
+
|
|
80
|
+
values.each do |item|
|
|
81
|
+
raise ArgumentError, "#{label}[] must be #{type_name}, got #{item.class}" unless item.is_a?(input_type)
|
|
82
|
+
end
|
|
83
|
+
values
|
|
84
|
+
end
|
|
85
|
+
private :build_inputs
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# `trigger`/`scrape` need a real keyword named after `param` so a wrong
|
|
89
|
+
# keyword raises Ruby's native ArgumentError; that requires class_eval.
|
|
90
|
+
def define_actions(param)
|
|
91
|
+
# For `param: :urls` this defines, e.g.:
|
|
92
|
+
# def trigger(urls:)
|
|
93
|
+
# trigger_with(dataset_key: DATASET_KEY, inputs: build_inputs(urls), extra_query: EXTRA_QUERY)
|
|
94
|
+
# end
|
|
95
|
+
#
|
|
96
|
+
# def scrape(urls:)
|
|
97
|
+
# inputs = build_inputs(urls)
|
|
98
|
+
# return [] if inputs.empty?
|
|
99
|
+
#
|
|
100
|
+
# scrape_with(dataset_key: DATASET_KEY, inputs:, extra_query: EXTRA_QUERY)
|
|
101
|
+
# end
|
|
102
|
+
class_eval(<<~RUBY, __FILE__, __LINE__ + 1) # rubocop:disable Style/DocumentDynamicEvalDefinition -- the generated methods are spelled out in the comment above
|
|
103
|
+
def trigger(#{param}:)
|
|
104
|
+
trigger_with(dataset_key: DATASET_KEY, inputs: build_inputs(#{param}), extra_query: EXTRA_QUERY)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def scrape(#{param}:)
|
|
108
|
+
inputs = build_inputs(#{param})
|
|
109
|
+
return [] if inputs.empty?
|
|
110
|
+
|
|
111
|
+
scrape_with(dataset_key: DATASET_KEY, inputs:, extra_query: EXTRA_QUERY)
|
|
112
|
+
end
|
|
113
|
+
RUBY
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
# Trigger an asynchronous Bright Data collection.
|
|
120
|
+
#
|
|
121
|
+
# @param dataset_key [Symbol] key in {BrightData::Datasets::LINKEDIN}
|
|
122
|
+
# @param inputs [Array<Data>] input objects
|
|
123
|
+
# @param extra_query [Hash] additional query params
|
|
124
|
+
# @return [BrightData::Snapshot]
|
|
125
|
+
def trigger_with(dataset_key:, inputs:, extra_query: {})
|
|
126
|
+
payload = @http.post(
|
|
127
|
+
path: TRIGGER_PATH,
|
|
128
|
+
query: query_for(dataset_key:, extra: extra_query),
|
|
129
|
+
body: { input: serialize_inputs(inputs) }
|
|
130
|
+
)
|
|
131
|
+
Snapshot.new(
|
|
132
|
+
id: payload.fetch(Snapshot::TRIGGER_RESPONSE_KEY),
|
|
133
|
+
http: @http,
|
|
134
|
+
result_parser: result_parser
|
|
135
|
+
)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Run a synchronous Bright Data scrape.
|
|
139
|
+
#
|
|
140
|
+
# @param dataset_key [Symbol] key in {BrightData::Datasets::LINKEDIN}
|
|
141
|
+
# @param inputs [Array<Data>] input objects
|
|
142
|
+
# @param extra_query [Hash] additional query params
|
|
143
|
+
# @return [Array] parsed results
|
|
144
|
+
# @raise [BrightData::ScrapeTimeoutError] if Bright Data returns snapshot fallback
|
|
145
|
+
def scrape_with(dataset_key:, inputs:, extra_query: {}) # rubocop:disable Metrics/MethodLength -- the snapshot-fallback branch belongs with the scrape it recovers from
|
|
146
|
+
payload = @http.post(
|
|
147
|
+
path: SCRAPE_PATH,
|
|
148
|
+
query: query_for(dataset_key:, extra: extra_query),
|
|
149
|
+
body: { input: serialize_inputs(inputs) }
|
|
150
|
+
)
|
|
151
|
+
if payload.is_a?(Hash) && payload[:snapshot_id]
|
|
152
|
+
snapshot = Snapshot.new(
|
|
153
|
+
id: payload.fetch(Snapshot::TRIGGER_RESPONSE_KEY),
|
|
154
|
+
http: @http,
|
|
155
|
+
result_parser: result_parser
|
|
156
|
+
)
|
|
157
|
+
raise ScrapeTimeoutError.new(
|
|
158
|
+
"Bright Data /scrape exceeded its 60s cap. Use .trigger + Snapshot#wait, " \
|
|
159
|
+
"or recover via e.snapshot.wait",
|
|
160
|
+
snapshot_id: payload[:snapshot_id],
|
|
161
|
+
snapshot:
|
|
162
|
+
)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
result_parser.call(payload)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# @return [#call] parser mapping raw API arrays to endpoint-specific values
|
|
169
|
+
def result_parser
|
|
170
|
+
->(raw) { raw }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def query_for(dataset_key:, extra:)
|
|
174
|
+
{ dataset_id: Datasets.id_for(dataset_key) }.merge(extra)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def serialize_inputs(inputs)
|
|
178
|
+
inputs.map do |input|
|
|
179
|
+
input.respond_to?(:to_api_hash) ? input.to_api_hash : input.to_h
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def result_items(raw)
|
|
184
|
+
case raw
|
|
185
|
+
when Array
|
|
186
|
+
raw.flat_map { |item| item.is_a?(Array) ? item : [item] }
|
|
187
|
+
when Hash
|
|
188
|
+
[raw]
|
|
189
|
+
else
|
|
190
|
+
[]
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
# `client.linkedin.jobs` endpoint family for LinkedIn jobs.
|
|
6
|
+
class Jobs
|
|
7
|
+
# @return [BrightData::LinkedIn::Jobs::CollectByUrl]
|
|
8
|
+
attr_reader :collect_by_url
|
|
9
|
+
|
|
10
|
+
# @return [BrightData::LinkedIn::Jobs::DiscoverByUrl]
|
|
11
|
+
attr_reader :discover_by_url
|
|
12
|
+
|
|
13
|
+
# @return [BrightData::LinkedIn::Jobs::DiscoverByKeyword]
|
|
14
|
+
attr_reader :discover_by_keyword
|
|
15
|
+
|
|
16
|
+
# @param http [BrightData::HTTP] shared HTTP wrapper
|
|
17
|
+
def initialize(http:)
|
|
18
|
+
@collect_by_url = CollectByUrl.new(http:)
|
|
19
|
+
@discover_by_url = DiscoverByUrl.new(http:)
|
|
20
|
+
@discover_by_keyword = DiscoverByKeyword.new(http:)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [String] developer-friendly mode summary
|
|
24
|
+
def inspect
|
|
25
|
+
"#<BrightData::LinkedIn::Jobs modes=[:collect_by_url, :discover_by_url, :discover_by_keyword]>"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Jobs collect-by-URL mode.
|
|
29
|
+
#
|
|
30
|
+
# @example
|
|
31
|
+
# jobs = client.linkedin.jobs.collect_by_url.scrape(urls: ["https://www.linkedin.com/jobs/view/123/"])
|
|
32
|
+
class CollectByUrl
|
|
33
|
+
include Endpoint
|
|
34
|
+
|
|
35
|
+
endpoint(
|
|
36
|
+
dataset_key: :jobs_collect_by_url,
|
|
37
|
+
input: Types::JobUrlInput,
|
|
38
|
+
result: Types::Job,
|
|
39
|
+
param: :urls
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Jobs discover-by-URL mode.
|
|
44
|
+
#
|
|
45
|
+
# @example
|
|
46
|
+
# jobs = client.linkedin.jobs.discover_by_url.scrape(urls: ["https://www.linkedin.com/jobs/search/?keywords=ruby"])
|
|
47
|
+
class DiscoverByUrl
|
|
48
|
+
include Endpoint
|
|
49
|
+
|
|
50
|
+
endpoint(
|
|
51
|
+
dataset_key: :jobs_discover_by_url,
|
|
52
|
+
input: Types::JobUrlInput,
|
|
53
|
+
result: Types::Job,
|
|
54
|
+
param: :urls,
|
|
55
|
+
extra_query: { type: "discover_new", discover_by: "url" }
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Jobs discover-by-keyword mode.
|
|
60
|
+
#
|
|
61
|
+
# @example
|
|
62
|
+
# query = BrightData::LinkedIn::Types::JobKeywordInput.new(
|
|
63
|
+
# location: "New York",
|
|
64
|
+
# keyword: "ruby",
|
|
65
|
+
# country: nil, time_range: nil, job_type: nil, experience_level: nil,
|
|
66
|
+
# remote: nil, company: nil, selective_search: nil,
|
|
67
|
+
# jobs_to_not_include: nil, location_radius: nil
|
|
68
|
+
# )
|
|
69
|
+
# jobs = client.linkedin.jobs.discover_by_keyword.scrape(queries: [query])
|
|
70
|
+
class DiscoverByKeyword
|
|
71
|
+
include Endpoint
|
|
72
|
+
|
|
73
|
+
endpoint(
|
|
74
|
+
dataset_key: :jobs_discover_by_keyword,
|
|
75
|
+
input_type: Types::JobKeywordInput,
|
|
76
|
+
result: Types::Job,
|
|
77
|
+
param: :queries,
|
|
78
|
+
extra_query: { type: "discover_new", discover_by: "keyword" }
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
# Accessor for the LinkedIn endpoint families.
|
|
6
|
+
class Namespace
|
|
7
|
+
# @return [BrightData::LinkedIn::Profiles]
|
|
8
|
+
attr_reader :profiles
|
|
9
|
+
|
|
10
|
+
# @return [BrightData::LinkedIn::Companies]
|
|
11
|
+
attr_reader :companies
|
|
12
|
+
|
|
13
|
+
# @return [BrightData::LinkedIn::Jobs]
|
|
14
|
+
attr_reader :jobs
|
|
15
|
+
|
|
16
|
+
# @return [BrightData::LinkedIn::Posts]
|
|
17
|
+
attr_reader :posts
|
|
18
|
+
|
|
19
|
+
# @return [BrightData::LinkedIn::People]
|
|
20
|
+
attr_reader :people
|
|
21
|
+
|
|
22
|
+
# @param http [BrightData::HTTP] shared HTTP wrapper
|
|
23
|
+
def initialize(http:)
|
|
24
|
+
@profiles = Profiles.new(http:)
|
|
25
|
+
@companies = Companies.new(http:)
|
|
26
|
+
@jobs = Jobs.new(http:)
|
|
27
|
+
@posts = Posts.new(http:)
|
|
28
|
+
@people = People.new(http:)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
# `client.linkedin.people` endpoint family for LinkedIn people discovery.
|
|
6
|
+
class People
|
|
7
|
+
# @return [BrightData::LinkedIn::People::DiscoverNewProfiles]
|
|
8
|
+
attr_reader :discover_new_profiles
|
|
9
|
+
|
|
10
|
+
# @param http [BrightData::HTTP] shared HTTP wrapper
|
|
11
|
+
def initialize(http:)
|
|
12
|
+
@discover_new_profiles = DiscoverNewProfiles.new(http:)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @return [String] developer-friendly mode summary
|
|
16
|
+
def inspect
|
|
17
|
+
"#<BrightData::LinkedIn::People modes=[:discover_new_profiles]>"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# People discover-new-profiles mode.
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# query = BrightData::LinkedIn::Types::PeopleDiscoverInput.new(
|
|
24
|
+
# url: "https://www.linkedin.com", first_name: "Jane", last_name: "Smith"
|
|
25
|
+
# )
|
|
26
|
+
# profiles = client.linkedin.people.discover_new_profiles.scrape(queries: [query])
|
|
27
|
+
class DiscoverNewProfiles
|
|
28
|
+
include Endpoint
|
|
29
|
+
|
|
30
|
+
endpoint(
|
|
31
|
+
dataset_key: :people_discover_new_profiles,
|
|
32
|
+
input_type: Types::PeopleDiscoverInput,
|
|
33
|
+
result: Types::DiscoveredProfile,
|
|
34
|
+
param: :queries
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
# `client.linkedin.posts` endpoint family for LinkedIn posts.
|
|
6
|
+
class Posts
|
|
7
|
+
# @return [BrightData::LinkedIn::Posts::CollectByUrl]
|
|
8
|
+
attr_reader :collect_by_url
|
|
9
|
+
|
|
10
|
+
# @return [BrightData::LinkedIn::Posts::DiscoverByProfileUrl]
|
|
11
|
+
attr_reader :discover_by_profile_url
|
|
12
|
+
|
|
13
|
+
# @return [BrightData::LinkedIn::Posts::DiscoverByUrl]
|
|
14
|
+
attr_reader :discover_by_url
|
|
15
|
+
|
|
16
|
+
# @return [BrightData::LinkedIn::Posts::DiscoverByCompanyUrl]
|
|
17
|
+
attr_reader :discover_by_company_url
|
|
18
|
+
|
|
19
|
+
# @param http [BrightData::HTTP] shared HTTP wrapper
|
|
20
|
+
def initialize(http:)
|
|
21
|
+
@collect_by_url = CollectByUrl.new(http:)
|
|
22
|
+
@discover_by_profile_url = DiscoverByProfileUrl.new(http:)
|
|
23
|
+
@discover_by_url = DiscoverByUrl.new(http:)
|
|
24
|
+
@discover_by_company_url = DiscoverByCompanyUrl.new(http:)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @return [String] developer-friendly mode summary
|
|
28
|
+
def inspect
|
|
29
|
+
modes = %i[collect_by_url discover_by_profile_url discover_by_url discover_by_company_url]
|
|
30
|
+
"#<BrightData::LinkedIn::Posts modes=#{modes}>"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Posts collect-by-URL mode.
|
|
34
|
+
#
|
|
35
|
+
# @example
|
|
36
|
+
# posts = client.linkedin.posts.collect_by_url.scrape(urls: ["https://www.linkedin.com/posts/example"])
|
|
37
|
+
class CollectByUrl
|
|
38
|
+
include Endpoint
|
|
39
|
+
|
|
40
|
+
endpoint(
|
|
41
|
+
dataset_key: :posts_collect_by_url,
|
|
42
|
+
input: Types::PostUrlInput,
|
|
43
|
+
result: Types::Post,
|
|
44
|
+
param: :urls
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Posts discover-by-profile-URL mode.
|
|
49
|
+
#
|
|
50
|
+
# @example
|
|
51
|
+
# posts = client.linkedin.posts.discover_by_profile_url.scrape(profile_urls: ["https://www.linkedin.com/in/example/"])
|
|
52
|
+
class DiscoverByProfileUrl
|
|
53
|
+
include Endpoint
|
|
54
|
+
|
|
55
|
+
endpoint(
|
|
56
|
+
dataset_key: :posts_discover_by_profile_url,
|
|
57
|
+
input: Types::PostProfileUrlInput,
|
|
58
|
+
result: Types::Post,
|
|
59
|
+
param: :profile_urls,
|
|
60
|
+
extra_query: { type: "discover_new", discover_by: "profile_url" }
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Posts discover-by-URL mode.
|
|
65
|
+
#
|
|
66
|
+
# @example
|
|
67
|
+
# posts = client.linkedin.posts.discover_by_url.scrape(urls: ["https://www.linkedin.com/feed/"])
|
|
68
|
+
class DiscoverByUrl
|
|
69
|
+
include Endpoint
|
|
70
|
+
|
|
71
|
+
endpoint(
|
|
72
|
+
dataset_key: :posts_discover_by_url,
|
|
73
|
+
input: Types::PostUrlInput,
|
|
74
|
+
result: Types::Post,
|
|
75
|
+
param: :urls,
|
|
76
|
+
extra_query: { type: "discover_new", discover_by: "url" }
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Posts discover-by-company-URL mode.
|
|
81
|
+
#
|
|
82
|
+
# @example
|
|
83
|
+
# posts = client.linkedin.posts.discover_by_company_url.scrape(company_urls: ["https://www.linkedin.com/company/example/"])
|
|
84
|
+
class DiscoverByCompanyUrl
|
|
85
|
+
include Endpoint
|
|
86
|
+
|
|
87
|
+
endpoint(
|
|
88
|
+
dataset_key: :posts_discover_by_company_url,
|
|
89
|
+
input: Types::PostCompanyUrlInput,
|
|
90
|
+
result: Types::Post,
|
|
91
|
+
param: :company_urls,
|
|
92
|
+
extra_query: { type: "discover_new", discover_by: "company_url" }
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
# `client.linkedin.profiles` endpoint family for LinkedIn profiles by URL.
|
|
6
|
+
#
|
|
7
|
+
# @example Trigger an async collection
|
|
8
|
+
# snapshot = client.linkedin.profiles.trigger(urls: ["https://www.linkedin.com/in/example/"])
|
|
9
|
+
# @example Scrape synchronously
|
|
10
|
+
# profiles = client.linkedin.profiles.scrape(urls: ["https://www.linkedin.com/in/example/"])
|
|
11
|
+
#
|
|
12
|
+
# @!method trigger(urls:)
|
|
13
|
+
# @param urls [Array<String>] LinkedIn profile URLs
|
|
14
|
+
# @return [BrightData::Snapshot]
|
|
15
|
+
# @raise [BrightData::ArgumentError] if `urls` is not an Array
|
|
16
|
+
# @!method scrape(urls:)
|
|
17
|
+
# @param urls [Array<String>] LinkedIn profile URLs
|
|
18
|
+
# @return [Array<BrightData::LinkedIn::Types::Profile>]
|
|
19
|
+
# @raise [BrightData::ArgumentError] if `urls` is not an Array
|
|
20
|
+
# @raise [BrightData::ScrapeTimeoutError] when results exceed Bright Data's synchronous cap
|
|
21
|
+
class Profiles
|
|
22
|
+
include Endpoint
|
|
23
|
+
|
|
24
|
+
endpoint(
|
|
25
|
+
dataset_key: :profiles_collect_by_url,
|
|
26
|
+
input: Types::ProfileUrlInput,
|
|
27
|
+
result: Types::Profile,
|
|
28
|
+
param: :urls
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
module Types
|
|
6
|
+
# Typed representation of a LinkedIn company response.
|
|
7
|
+
#
|
|
8
|
+
# @!attribute [r] id
|
|
9
|
+
# @return [String, nil] company ID
|
|
10
|
+
# @!attribute [r] name
|
|
11
|
+
# @return [String, nil] company name
|
|
12
|
+
# @!attribute [r] country_code
|
|
13
|
+
# @return [String, nil] country code
|
|
14
|
+
# @!attribute [r] locations
|
|
15
|
+
# @return [Array<Hash>, nil] company locations
|
|
16
|
+
# @!attribute [r] followers
|
|
17
|
+
# @return [Integer, String, nil] follower count
|
|
18
|
+
# @!attribute [r] employees_in_linkedin
|
|
19
|
+
# @return [Integer, String, nil] LinkedIn employee count
|
|
20
|
+
# @!attribute [r] company_size
|
|
21
|
+
# @return [String, nil] company size
|
|
22
|
+
# @!attribute [r] industries
|
|
23
|
+
# @return [Array<String>, nil] industries
|
|
24
|
+
# @!attribute [r] specialties
|
|
25
|
+
# @return [Array<String>, nil] specialties
|
|
26
|
+
# @!attribute [r] website
|
|
27
|
+
# @return [String, nil] company website
|
|
28
|
+
# @!attribute [r] founded
|
|
29
|
+
# @return [String, Integer, nil] founded year
|
|
30
|
+
# @!attribute [r] company_id
|
|
31
|
+
# @return [String, nil] LinkedIn company ID
|
|
32
|
+
# @!attribute [r] employees
|
|
33
|
+
# @return [Array<Hash>, nil] employees
|
|
34
|
+
# @!attribute [r] similar
|
|
35
|
+
# @return [Array<Hash>, nil] similar companies
|
|
36
|
+
# @!attribute [r] updates
|
|
37
|
+
# @return [Array<Hash>, nil] updates
|
|
38
|
+
# @!attribute [r] logo
|
|
39
|
+
# @return [String, nil] logo URL
|
|
40
|
+
# @!attribute [r] image
|
|
41
|
+
# @return [String, nil] image URL
|
|
42
|
+
# @!attribute [r] headquarters
|
|
43
|
+
# @return [Hash, nil] headquarters
|
|
44
|
+
# @!attribute [r] funding
|
|
45
|
+
# @return [Hash, nil] funding details
|
|
46
|
+
# @!attribute [r] investors
|
|
47
|
+
# @return [Array<Hash>, nil] investors
|
|
48
|
+
# @!attribute [r] affiliated
|
|
49
|
+
# @return [Array<Hash>, nil] affiliated companies
|
|
50
|
+
# @!attribute [r] raw
|
|
51
|
+
# @return [Hash] full parsed API response
|
|
52
|
+
# @note Use #raw to access fields not yet typed by this gem.
|
|
53
|
+
Company = Data.define(
|
|
54
|
+
:id, :name, :country_code, :locations, :followers,
|
|
55
|
+
:employees_in_linkedin, :company_size, :industries, :specialties,
|
|
56
|
+
:website, :founded, :company_id, :employees, :similar, :updates,
|
|
57
|
+
:logo, :image, :headquarters, :funding, :investors, :affiliated, :raw
|
|
58
|
+
) do
|
|
59
|
+
# Build a company from a symbol-keyed API response.
|
|
60
|
+
#
|
|
61
|
+
# @param hash [Hash] symbolized-key API response object
|
|
62
|
+
# @return [BrightData::LinkedIn::Types::Company]
|
|
63
|
+
def self.from_api(hash) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength -- flat field-by-field mapping from the API response
|
|
64
|
+
new(
|
|
65
|
+
id: hash[:id],
|
|
66
|
+
name: hash[:name],
|
|
67
|
+
country_code: hash[:country_code],
|
|
68
|
+
locations: hash[:locations],
|
|
69
|
+
followers: hash[:followers],
|
|
70
|
+
employees_in_linkedin: hash[:employees_in_linkedin],
|
|
71
|
+
company_size: hash[:company_size],
|
|
72
|
+
industries: hash[:industries],
|
|
73
|
+
specialties: hash[:specialties],
|
|
74
|
+
website: hash[:website],
|
|
75
|
+
founded: hash[:founded],
|
|
76
|
+
company_id: hash[:company_id],
|
|
77
|
+
employees: hash[:employees],
|
|
78
|
+
similar: hash[:similar],
|
|
79
|
+
updates: hash[:updates],
|
|
80
|
+
logo: hash[:logo],
|
|
81
|
+
image: hash[:image],
|
|
82
|
+
headquarters: hash[:headquarters],
|
|
83
|
+
funding: hash[:funding],
|
|
84
|
+
investors: hash[:investors],
|
|
85
|
+
affiliated: hash[:affiliated],
|
|
86
|
+
raw: hash
|
|
87
|
+
)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
module Types
|
|
6
|
+
# Input shape for `linkedin.companies.{trigger,scrape}`.
|
|
7
|
+
#
|
|
8
|
+
# @!attribute [r] url
|
|
9
|
+
# @return [String] LinkedIn company URL
|
|
10
|
+
CompanyUrlInput = Data.define(:url)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
module Types
|
|
6
|
+
# Typed representation of a discovered LinkedIn profile response.
|
|
7
|
+
#
|
|
8
|
+
# @!attribute [r] url
|
|
9
|
+
# @return [String, nil] profile URL
|
|
10
|
+
# @!attribute [r] name
|
|
11
|
+
# @return [String, nil] profile name
|
|
12
|
+
# @!attribute [r] subtitle
|
|
13
|
+
# @return [String, nil] profile subtitle
|
|
14
|
+
# @!attribute [r] location
|
|
15
|
+
# @return [String, nil] profile location
|
|
16
|
+
# @!attribute [r] experience
|
|
17
|
+
# @return [String, Hash, Array, nil] experience summary
|
|
18
|
+
# @!attribute [r] education
|
|
19
|
+
# @return [String, Hash, Array, nil] education summary
|
|
20
|
+
# @!attribute [r] avatar
|
|
21
|
+
# @return [String, nil] avatar URL
|
|
22
|
+
# @!attribute [r] raw
|
|
23
|
+
# @return [Hash] full parsed API response
|
|
24
|
+
# @note Use #raw to access fields not yet typed by this gem.
|
|
25
|
+
DiscoveredProfile = Data.define(:url, :name, :subtitle, :location, :experience, :education, :avatar, :raw) do
|
|
26
|
+
# Build a discovered profile from a symbol-keyed API response.
|
|
27
|
+
#
|
|
28
|
+
# @param hash [Hash] symbolized-key API response object
|
|
29
|
+
# @return [BrightData::LinkedIn::Types::DiscoveredProfile]
|
|
30
|
+
def self.from_api(hash)
|
|
31
|
+
new(
|
|
32
|
+
url: hash[:url],
|
|
33
|
+
name: hash[:name],
|
|
34
|
+
subtitle: hash[:subtitle],
|
|
35
|
+
location: hash[:location],
|
|
36
|
+
experience: hash[:experience],
|
|
37
|
+
education: hash[:education],
|
|
38
|
+
avatar: hash[:avatar],
|
|
39
|
+
raw: hash
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module BrightData
|
|
4
|
+
module LinkedIn
|
|
5
|
+
module Types
|
|
6
|
+
# Typed representation of a LinkedIn job response.
|
|
7
|
+
#
|
|
8
|
+
# @!attribute [r] url
|
|
9
|
+
# @return [String, nil] job URL
|
|
10
|
+
# @!attribute [r] job_posting_id
|
|
11
|
+
# @return [String, nil] job posting ID
|
|
12
|
+
# @!attribute [r] job_title
|
|
13
|
+
# @return [String, nil] job title
|
|
14
|
+
# @!attribute [r] company_name
|
|
15
|
+
# @return [String, nil] company name
|
|
16
|
+
# @!attribute [r] job_location
|
|
17
|
+
# @return [String, nil] job location
|
|
18
|
+
# @!attribute [r] job_summary
|
|
19
|
+
# @return [String, nil] job summary
|
|
20
|
+
# @!attribute [r] job_base_pay_range
|
|
21
|
+
# @return [String, nil] base pay range
|
|
22
|
+
# @!attribute [r] job_posted_time
|
|
23
|
+
# @return [String, nil] posted time
|
|
24
|
+
# @!attribute [r] company_logo
|
|
25
|
+
# @return [String, nil] company logo URL
|
|
26
|
+
# @!attribute [r] raw
|
|
27
|
+
# @return [Hash] full parsed API response
|
|
28
|
+
# @note Use #raw to access fields not yet typed by this gem.
|
|
29
|
+
Job = Data.define(
|
|
30
|
+
:url, :job_posting_id, :job_title, :company_name, :job_location,
|
|
31
|
+
:job_summary, :job_base_pay_range, :job_posted_time, :company_logo, :raw
|
|
32
|
+
) do
|
|
33
|
+
# Build a job from a symbol-keyed API response.
|
|
34
|
+
#
|
|
35
|
+
# @param hash [Hash] symbolized-key API response object
|
|
36
|
+
# @return [BrightData::LinkedIn::Types::Job]
|
|
37
|
+
def self.from_api(hash) # rubocop:disable Metrics/MethodLength -- flat field-by-field mapping from the API response
|
|
38
|
+
new(
|
|
39
|
+
url: hash[:url],
|
|
40
|
+
job_posting_id: hash[:job_posting_id],
|
|
41
|
+
job_title: hash[:job_title],
|
|
42
|
+
company_name: hash[:company_name],
|
|
43
|
+
job_location: hash[:job_location],
|
|
44
|
+
job_summary: hash[:job_summary],
|
|
45
|
+
job_base_pay_range: hash[:job_base_pay_range],
|
|
46
|
+
job_posted_time: hash[:job_posted_time],
|
|
47
|
+
company_logo: hash[:company_logo],
|
|
48
|
+
raw: hash
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|