birdsong 0.2.3 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/birdsong/monkeypatch.rb +1 -1
- data/lib/birdsong/scrapers/scraper.rb +46 -43
- data/lib/birdsong/user.rb +3 -3
- data/lib/birdsong/version.rb +1 -1
- data/lib/birdsong.rb +1 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebf200672c8ac8965aa8e61af1346f32724a1b5244cebda6dff9e74efbdaea50
|
4
|
+
data.tar.gz: b73a8da99d9fdf61ccb62e2ddd474e2267ca4e5057a29f642d36d8a9e4f8837c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 686cf3df6f5dfac9fbe71de0c599b5581057ef8f5e63c93dfda4c6ab58e5b775a475dda944b41f2d0fee2a97acb1e478b301969c9af3ee4c9540636bee86f851
|
7
|
+
data.tar.gz: 556899f220a43bd05e3fed8b214c77312d48f33cc4d029c8d21c9a919cf2d21f046821d9b883014fa1f966353b926bb2f9510a0dc55f850c9f293fb42c0e0a45
|
data/lib/birdsong/monkeypatch.rb
CHANGED
@@ -41,7 +41,7 @@ module SeleniumMonkeypatch
|
|
41
41
|
data[:sessionId] = @session_id if @session_id
|
42
42
|
message = @ws.send_cmd(**data)
|
43
43
|
if message.nil? == false && message["error"] && (method != "Fetch.continueRequest")
|
44
|
-
raise Birdsong::
|
44
|
+
raise Birdsong::WebDriverError, error_message(message["error"])
|
45
45
|
end
|
46
46
|
|
47
47
|
message
|
@@ -94,6 +94,7 @@ module Birdsong
|
|
94
94
|
end
|
95
95
|
rescue Selenium::WebDriver::Error::WebDriverError
|
96
96
|
# Eat them
|
97
|
+
rescue Birdsong::WebDriverError
|
97
98
|
end
|
98
99
|
|
99
100
|
# Now that the intercept is set up, we visit the page we want
|
@@ -108,8 +109,10 @@ module Birdsong
|
|
108
109
|
end
|
109
110
|
|
110
111
|
page.driver.execute_script("window.stop();")
|
112
|
+
|
111
113
|
raise Birdsong::NoTweetFoundError if response_body.nil?
|
112
114
|
Oj.load(response_body)
|
115
|
+
rescue Birdsong::WebDriverError
|
113
116
|
end
|
114
117
|
|
115
118
|
private
|
@@ -140,49 +143,49 @@ module Birdsong
|
|
140
143
|
Capybara.current_driver = :selenium
|
141
144
|
end
|
142
145
|
|
143
|
-
def login
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
# We don't have to login if we already are
|
156
|
-
begin
|
157
|
-
|
158
|
-
rescue Capybara::ElementNotFound; end
|
159
|
-
|
160
|
-
# Check if we're redirected to a login page, if we aren't we're already logged in
|
161
|
-
return unless page.has_xpath?('//*[@id="loginForm"]/div/div[3]/button')
|
162
|
-
|
163
|
-
# Try to log in
|
164
|
-
loop_count = 0
|
165
|
-
while loop_count < 5 do
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
end
|
177
|
-
|
178
|
-
# Sometimes Instagram just... doesn't let you log in
|
179
|
-
raise "Instagram not accessible" if loop_count == 5
|
180
|
-
|
181
|
-
# No we don't want to save our login credentials
|
182
|
-
begin
|
183
|
-
|
184
|
-
rescue Capybara::ElementNotFound; end
|
185
|
-
end
|
146
|
+
# def login
|
147
|
+
# # Reset the sessions so that there's nothing laying around
|
148
|
+
# page.quit
|
149
|
+
|
150
|
+
# # Check if we're on a Instagram page already, if not visit it.
|
151
|
+
# unless page.driver.browser.current_url.include? "twitter.com"
|
152
|
+
# # There seems to be a bug in the Linux ARM64 version of chromedriver where this will properly
|
153
|
+
# # navigate but then timeout, crashing it all up. So instead we check and raise the error when
|
154
|
+
# # that then fails again.
|
155
|
+
# page.driver.browser.navigate.to("https://twitter.com")
|
156
|
+
# end
|
157
|
+
|
158
|
+
# # We don't have to login if we already are
|
159
|
+
# begin
|
160
|
+
# return if find_field("Search", wait: 10).present?
|
161
|
+
# rescue Capybara::ElementNotFound; end
|
162
|
+
|
163
|
+
# # Check if we're redirected to a login page, if we aren't we're already logged in
|
164
|
+
# return unless page.has_xpath?('//*[@id="loginForm"]/div/div[3]/button')
|
165
|
+
|
166
|
+
# # Try to log in
|
167
|
+
# loop_count = 0
|
168
|
+
# while loop_count < 5 do
|
169
|
+
# fill_in("username", with: ENV["TWITTER_USER_NAME"])
|
170
|
+
# fill_in("password", with: ENV["TWITTER_PASSWORD"])
|
171
|
+
|
172
|
+
# begin
|
173
|
+
# click_button("Log in", exact_text: true) # Note: "Log in" (lowercase `in`) instead redirects to Facebook's login page
|
174
|
+
# rescue Capybara::ElementNotFound; end # If we can't find it don't break horribly, just keep waiting
|
175
|
+
|
176
|
+
# break unless has_css?('p[data-testid="login-error-message"', wait: 10)
|
177
|
+
# loop_count += 1
|
178
|
+
# sleep(rand * 10.3)
|
179
|
+
# end
|
180
|
+
|
181
|
+
# # Sometimes Instagram just... doesn't let you log in
|
182
|
+
# raise "Instagram not accessible" if loop_count == 5
|
183
|
+
|
184
|
+
# # No we don't want to save our login credentials
|
185
|
+
# begin
|
186
|
+
# click_on("Save Info")
|
187
|
+
# rescue Capybara::ElementNotFound; end
|
188
|
+
# end
|
186
189
|
|
187
190
|
def fetch_image(url)
|
188
191
|
request = Typhoeus::Request.new(url, followlocation: true)
|
data/lib/birdsong/user.rb
CHANGED
@@ -30,7 +30,7 @@ module Birdsong
|
|
30
30
|
def parse(user_object)
|
31
31
|
@id = user_object[:id]
|
32
32
|
@name = user_object[:name]
|
33
|
-
@username = user_object[:
|
33
|
+
@username = user_object[:username]
|
34
34
|
@created_at = DateTime.parse(user_object[:sign_up_date])
|
35
35
|
@location = user_object[:location]
|
36
36
|
|
@@ -42,8 +42,8 @@ module Birdsong
|
|
42
42
|
@url = "https://www.twitter.com/#{@username}" if @url.nil?
|
43
43
|
|
44
44
|
@followers_count = user_object[:followers_count]
|
45
|
-
@following_count = user_object[:
|
46
|
-
@tweet_count = user_object[:
|
45
|
+
@following_count = user_object[:following_count]
|
46
|
+
@tweet_count = user_object[:tweet_count]
|
47
47
|
@listed_count = user_object[:listed_count]
|
48
48
|
@verified = user_object[:verified] # this will always be `false` but we're keeping it here for compatibility
|
49
49
|
@profile_image_file_name = Birdsong.retrieve_media(@profile_image_url)
|
data/lib/birdsong/version.rb
CHANGED
data/lib/birdsong.rb
CHANGED
@@ -23,6 +23,7 @@ module Birdsong
|
|
23
23
|
class InvalidIdError < Error; end
|
24
24
|
class InvalidMediaTypeError < Error; end
|
25
25
|
class NoTweetFoundError < Error; end
|
26
|
+
class WebDriverError < Error; end
|
26
27
|
class RateLimitExceeded < Error
|
27
28
|
attr_reader :rate_limit
|
28
29
|
attr_reader :rate_remaining
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: birdsong
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: typhoeus
|
@@ -200,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
200
200
|
- !ruby/object:Gem::Version
|
201
201
|
version: '0'
|
202
202
|
requirements: []
|
203
|
-
rubygems_version: 3.4.
|
203
|
+
rubygems_version: 3.4.20
|
204
204
|
signing_key:
|
205
205
|
specification_version: 4
|
206
206
|
summary: A gem to interface with Twitter's API V2
|