forki 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/forki/scrapers/post_scraper.rb +5 -1
- data/lib/forki/scrapers/scraper.rb +40 -8
- data/lib/forki/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67f9dde3202683b68acd00f10ec572ba80bb4a749fa369251c5e512bb8304393
|
4
|
+
data.tar.gz: 55f099e3e63f174c4ae00aef56c27c77a44b5b49414dcfb7f37e32ae8bb9f47f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35db430333dcc95259f49d1159e976f3d969b648f80c4af9a6b87c0ae0ff4ae67462f1942797da212861c26aeab7ee982f76b86e3af8563dca561e57d012aaa3
|
7
|
+
data.tar.gz: 40f0be4238a4b0bfd7fd23309403df139f82e728739211226c753e52ee96cb7ce2300ee4c0c81539c90761f05d8054b2f7a1b1d1e16cd9cd2744263df4ebf16e
|
@@ -100,7 +100,11 @@ module Forki
|
|
100
100
|
begin
|
101
101
|
find("span", wait: 5, text: "This Video Isn't Available Anymore", exact_text: false)
|
102
102
|
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
103
|
-
|
103
|
+
begin
|
104
|
+
find("span", wait: 5, text: "This Page Isn't Available", exact_text: false)
|
105
|
+
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
106
|
+
return true
|
107
|
+
end
|
104
108
|
end
|
105
109
|
end
|
106
110
|
|
@@ -112,18 +112,41 @@ module Forki
|
|
112
112
|
|
113
113
|
url ||= "https://www.facebook.com"
|
114
114
|
|
115
|
-
|
116
115
|
page.driver.browser.navigate.to(url) # Visit the url passed in or the facebook homepage if nothing is
|
117
116
|
|
118
117
|
# Look for "login_form" box, which throws an error if not found. So we catch it and run the rest of the tests
|
119
118
|
begin
|
120
119
|
login_form = first(id: "login_form", wait: 5)
|
121
120
|
rescue Capybara::ElementNotFound
|
122
|
-
|
121
|
+
begin
|
122
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
123
|
+
rescue Capybara::ElementNotFound
|
124
|
+
return unless page.title.downcase.include?("facebook - log in")
|
125
|
+
end
|
123
126
|
end
|
124
127
|
|
125
|
-
# Since we're not logged in, let's do that
|
126
|
-
|
128
|
+
# Since we're not logged in, let's do that quickly
|
129
|
+
if login_form.nil?
|
130
|
+
page.driver.browser.navigate.to("https://www.facebook.com")
|
131
|
+
|
132
|
+
# Find the login form... again (Yes, we could extract this out, but it's only ever used
|
133
|
+
# here, so it's not worth the effort)
|
134
|
+
begin
|
135
|
+
login_form = first(id: "login_form", wait: 5)
|
136
|
+
rescue Capybara::ElementNotFound
|
137
|
+
begin
|
138
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
139
|
+
rescue Capybara::ElementNotFound
|
140
|
+
return unless page.title.downcase.include?("facebook - log in")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if login_form.nil?
|
146
|
+
# maybe we're already logged in?
|
147
|
+
sleep(rand * 10.3)
|
148
|
+
return
|
149
|
+
end
|
127
150
|
|
128
151
|
login_form.fill_in("email", with: ENV["FACEBOOK_EMAIL"])
|
129
152
|
login_form.fill_in("pass", with: ENV["FACEBOOK_PASSWORD"])
|
@@ -154,10 +177,19 @@ module Forki
|
|
154
177
|
# If either of those two conditions are false, raises an exception
|
155
178
|
def validate_and_load_page(url)
|
156
179
|
Capybara.app_host = "https://www.facebook.com"
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
raise Forki::InvalidUrlError unless
|
180
|
+
facebook_hosts = ["facebook.com", "www.facebook.com", "web.facebook.com", "m.facebook.com"]
|
181
|
+
parsed_url = URI.parse(url)
|
182
|
+
host = parsed_url.host
|
183
|
+
raise Forki::InvalidUrlError unless facebook_hosts.include?(host)
|
184
|
+
|
185
|
+
# Replace the host with a default one to prevent redirect loops that can happen
|
186
|
+
unless parsed_url.host == "www.facebook.com"
|
187
|
+
parsed_url.host = "www.facebook.com"
|
188
|
+
url = parsed_url.to_s
|
189
|
+
end
|
190
|
+
|
191
|
+
visit "https://www.facebook.com"
|
192
|
+
login
|
161
193
|
visit url unless current_url.start_with?(url)
|
162
194
|
end
|
163
195
|
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
- !ruby/object:Gem::Version
|
140
140
|
version: '0'
|
141
141
|
requirements: []
|
142
|
-
rubygems_version: 3.
|
142
|
+
rubygems_version: 3.4.14
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: A gem to scrape Facebook pages for archive purposes.
|