forki 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/forki/scrapers/post_scraper.rb +5 -1
- data/lib/forki/scrapers/scraper.rb +40 -8
- data/lib/forki/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67f9dde3202683b68acd00f10ec572ba80bb4a749fa369251c5e512bb8304393
|
4
|
+
data.tar.gz: 55f099e3e63f174c4ae00aef56c27c77a44b5b49414dcfb7f37e32ae8bb9f47f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35db430333dcc95259f49d1159e976f3d969b648f80c4af9a6b87c0ae0ff4ae67462f1942797da212861c26aeab7ee982f76b86e3af8563dca561e57d012aaa3
|
7
|
+
data.tar.gz: 40f0be4238a4b0bfd7fd23309403df139f82e728739211226c753e52ee96cb7ce2300ee4c0c81539c90761f05d8054b2f7a1b1d1e16cd9cd2744263df4ebf16e
|
@@ -100,7 +100,11 @@ module Forki
|
|
100
100
|
begin
|
101
101
|
find("span", wait: 5, text: "This Video Isn't Available Anymore", exact_text: false)
|
102
102
|
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
103
|
-
|
103
|
+
begin
|
104
|
+
find("span", wait: 5, text: "This Page Isn't Available", exact_text: false)
|
105
|
+
rescue Capybara::ElementNotFound, Selenium::WebDriver::Error::StaleElementReferenceError
|
106
|
+
return true
|
107
|
+
end
|
104
108
|
end
|
105
109
|
end
|
106
110
|
|
@@ -112,18 +112,41 @@ module Forki
|
|
112
112
|
|
113
113
|
url ||= "https://www.facebook.com"
|
114
114
|
|
115
|
-
|
116
115
|
page.driver.browser.navigate.to(url) # Visit the url passed in or the facebook homepage if nothing is
|
117
116
|
|
118
117
|
# Look for "login_form" box, which throws an error if not found. So we catch it and run the rest of the tests
|
119
118
|
begin
|
120
119
|
login_form = first(id: "login_form", wait: 5)
|
121
120
|
rescue Capybara::ElementNotFound
|
122
|
-
|
121
|
+
begin
|
122
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
123
|
+
rescue Capybara::ElementNotFound
|
124
|
+
return unless page.title.downcase.include?("facebook - log in")
|
125
|
+
end
|
123
126
|
end
|
124
127
|
|
125
|
-
# Since we're not logged in, let's do that
|
126
|
-
|
128
|
+
# Since we're not logged in, let's do that quickly
|
129
|
+
if login_form.nil?
|
130
|
+
page.driver.browser.navigate.to("https://www.facebook.com")
|
131
|
+
|
132
|
+
# Find the login form... again (Yes, we could extract this out, but it's only ever used
|
133
|
+
# here, so it's not worth the effort)
|
134
|
+
begin
|
135
|
+
login_form = first(id: "login_form", wait: 5)
|
136
|
+
rescue Capybara::ElementNotFound
|
137
|
+
begin
|
138
|
+
login_form = find(:xpath, '//form[@data-testid="royal_login_form"]')
|
139
|
+
rescue Capybara::ElementNotFound
|
140
|
+
return unless page.title.downcase.include?("facebook - log in")
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if login_form.nil?
|
146
|
+
# maybe we're already logged in?
|
147
|
+
sleep(rand * 10.3)
|
148
|
+
return
|
149
|
+
end
|
127
150
|
|
128
151
|
login_form.fill_in("email", with: ENV["FACEBOOK_EMAIL"])
|
129
152
|
login_form.fill_in("pass", with: ENV["FACEBOOK_PASSWORD"])
|
@@ -154,10 +177,19 @@ module Forki
|
|
154
177
|
# If either of those two conditions are false, raises an exception
|
155
178
|
def validate_and_load_page(url)
|
156
179
|
Capybara.app_host = "https://www.facebook.com"
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
raise Forki::InvalidUrlError unless
|
180
|
+
facebook_hosts = ["facebook.com", "www.facebook.com", "web.facebook.com", "m.facebook.com"]
|
181
|
+
parsed_url = URI.parse(url)
|
182
|
+
host = parsed_url.host
|
183
|
+
raise Forki::InvalidUrlError unless facebook_hosts.include?(host)
|
184
|
+
|
185
|
+
# Replace the host with a default one to prevent redirect loops that can happen
|
186
|
+
unless parsed_url.host == "www.facebook.com"
|
187
|
+
parsed_url.host = "www.facebook.com"
|
188
|
+
url = parsed_url.to_s
|
189
|
+
end
|
190
|
+
|
191
|
+
visit "https://www.facebook.com"
|
192
|
+
login
|
161
193
|
visit url unless current_url.start_with?(url)
|
162
194
|
end
|
163
195
|
|
data/lib/forki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ''
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
- !ruby/object:Gem::Version
|
140
140
|
version: '0'
|
141
141
|
requirements: []
|
142
|
-
rubygems_version: 3.
|
142
|
+
rubygems_version: 3.4.14
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: A gem to scrape Facebook pages for archive purposes.
|