forki 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb9eb80820ccf86f9a99d1c757940a935137d834a2bd76d855deaeee470840a1
4
- data.tar.gz: 667f0600afdf53ced62b57def3021a6ccc0cf2a194142e2f23beb76c8384a35a
3
+ metadata.gz: a0667614af238aeb8089c1af60794918c0986b7f5e12fa6fe96d33a2a5c1a06f
4
+ data.tar.gz: 5668a5a6056bdf9bdd9c9fd8f119fa5ed754c5ee0b152d2764b26e9f9d7a8804
5
5
  SHA512:
6
- metadata.gz: 78aa5d9bf07a1f5790f8e1705056956b30a7d2854723b74fdfd0a7612be2c3988916333bfc5fc15dff62d62fda1d01452f375765b4e1ddc6633148372993ebbb
7
- data.tar.gz: 21d6c81749a1f1fe160508e9efe53ca6abc2be59b01eb31684a500df79b9092647de47d1545ea21c0c701c5d1e34f2f40c35dcf0765bdfce6d7c72ec60feee2b
6
+ metadata.gz: 509ffb30dada2666236ed0900e1def8a61413ee3ba2e8705e31bd01422f8d2baf2cd24479faf906b6374602fcb0c1dc7bb334b7e02a560c798eba6efab30a2c9
7
+ data.tar.gz: 3f79b804b5505222e06a49352807d92c95f13a8735bedcd1b32cbf079be72b0922052014ef82da8771db108f7b0b674cd132caf7d377084c57cf0d245eb049e1
data/Gemfile CHANGED
@@ -19,3 +19,5 @@ gem "dotenv", "~> 2.7.6"
19
19
  gem "byebug"
20
20
 
21
21
  gem "rack", "2.2.4"
22
+
23
+ gem "curb", "~> 1.0", ">= 1.0.5"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.1.0)
4
+ forki (0.1.1)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -48,6 +48,7 @@ GEM
48
48
  xpath (~> 3.2)
49
49
  concurrent-ruby (1.2.2)
50
50
  crass (1.0.6)
51
+ curb (1.0.5)
51
52
  dotenv (2.7.6)
52
53
  erubi (1.12.0)
53
54
  ethon (0.16.0)
@@ -150,6 +151,7 @@ PLATFORMS
150
151
 
151
152
  DEPENDENCIES
152
153
  byebug
154
+ curb (~> 1.0, >= 1.0.5)
153
155
  dotenv (~> 2.7.6)
154
156
  forki!
155
157
  minitest (~> 5.0)
@@ -62,8 +62,16 @@ module Forki
62
62
 
63
63
  def check_if_post_is_image(graphql_objects)
64
64
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
65
- true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
66
- true unless graphql_object.fetch("currMedia", nil).nil?
65
+ return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
66
+ return true unless graphql_object.fetch("currMedia", nil).nil?
67
+
68
+ # This is a complicated form for `web.facebook.com` posts
69
+
70
+ if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
71
+ if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
72
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
73
+ end
74
+ end
67
75
  end
68
76
  end
69
77
 
@@ -220,26 +228,53 @@ module Forki
220
228
 
221
229
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
222
230
  def extract_image_post_data(graphql_object_array)
223
- graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
224
- curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
225
- creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
231
+ # This is a weird one-off style
232
+ graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
233
+ unless graphql_object.nil? || graphql_object.count == 0
234
+ attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
235
+
236
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
237
+ id = graphql_object["node"]["post_id"]
238
+ num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
239
+ reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
240
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
241
+ text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
242
+ profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
243
+ created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
244
+ has_video = false
245
+ else
226
246
 
227
- feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
228
- share_count_object = feedback_object.fetch("share_count", {})
247
+ graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
248
+ curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
249
+ creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
250
+
251
+ feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
252
+ share_count_object = feedback_object.fetch("share_count", {})
253
+
254
+ poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
229
255
 
230
- poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
256
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
257
+ id = curr_media_object["currMedia"]["id"],
258
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
259
+ num_shares = share_count_object.fetch("count", nil),
260
+ reshare_warning = feedback_object["should_show_reshare_warning"],
261
+ image_url = curr_media_object["currMedia"]["image"]["uri"],
262
+ text = (creation_story_object["message"] || {}).fetch("text", nil),
263
+ profile_link = poster["url"],
264
+ created_at = curr_media_object["currMedia"]["created_time"],
265
+ has_video = false
231
266
 
232
- reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
267
+ end
233
268
  post_details = {
234
- id: curr_media_object["currMedia"]["id"],
235
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
236
- num_shares: share_count_object.fetch("count", nil),
237
- reshare_warning: feedback_object["should_show_reshare_warning"],
238
- image_url: curr_media_object["currMedia"]["image"]["uri"],
239
- text: (creation_story_object["message"] || {}).fetch("text", nil),
240
- profile_link: poster["url"],
241
- created_at: curr_media_object["currMedia"]["created_time"],
242
- has_video: false
269
+ id: id,
270
+ num_comments: num_comments,
271
+ num_shares: num_shares,
272
+ reshare_warning: reshare_warning,
273
+ image_url: image_url,
274
+ text: text,
275
+ profile_link: profile_link,
276
+ created_at: created_at,
277
+ has_video: has_video
243
278
  }
244
279
  post_details[:image_file] = Forki.retrieve_media(post_details[:image_url])
245
280
  post_details[:reactions] = reaction_counts
@@ -349,8 +384,9 @@ module Forki
349
384
  page.quit
350
385
 
351
386
  post_data
352
- rescue Net::ReadTimeout
353
- # Eat it?
387
+ rescue Net::ReadTimeout => e
388
+ puts "Time out error: #{e}"
389
+ puts e.backtrace
354
390
  rescue StandardError => e
355
391
  raise e
356
392
  ensure
@@ -6,6 +6,7 @@ require "dotenv/load"
6
6
  require "oj"
7
7
  require "selenium-webdriver"
8
8
  require "open-uri"
9
+ require "selenium/webdriver/remote/http/curb"
9
10
 
10
11
  options = Selenium::WebDriver::Options.chrome(exclude_switches: ["enable-automation"])
11
12
  options.add_argument("--start-maximized")
@@ -20,8 +21,8 @@ options.add_argument("--remote-debugging-port=9222")
20
21
  options.add_argument("--user-data-dir=/tmp/tarun_forki_#{SecureRandom.uuid}")
21
22
 
22
23
  Capybara.register_driver :selenium_forki do |app|
23
- client = Selenium::WebDriver::Remote::Http::Default.new
24
- client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
24
+ client = Selenium::WebDriver::Remote::Http::Curb.new
25
+ # client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
25
26
  Capybara::Selenium::Driver.new(app, browser: :chrome, options: options, http_client: client)
26
27
  end
27
28
 
@@ -97,8 +98,8 @@ module Forki
97
98
  options.add_argument("--user-data-dir=/tmp/tarun_forki_#{SecureRandom.uuid}")
98
99
 
99
100
  Capybara.register_driver :selenium_forki do |app|
100
- client = Selenium::WebDriver::Remote::Http::Default.new
101
- client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
101
+ client = Selenium::WebDriver::Remote::Http::Curb.new
102
+ # client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
102
103
  Capybara::Selenium::Driver.new(app, browser: :chrome, options: options, http_client: client)
103
104
  end
104
105
 
@@ -110,7 +111,9 @@ module Forki
110
111
  raise MissingCredentialsError if ENV["FACEBOOK_EMAIL"].nil? || ENV["FACEBOOK_PASSWORD"].nil?
111
112
 
112
113
  url ||= "https://www.facebook.com"
113
- visit(url) # Visit the url passed in or the facebook homepage if nothing is
114
+
115
+
116
+ page.driver.browser.navigate.to(url) # Visit the url passed in or the facebook homepage if nothing is
114
117
 
115
118
  # Look for "login_form" box, which throws an error if not found. So we catch it and run the rest of the tests
116
119
  begin
@@ -120,7 +123,7 @@ module Forki
120
123
  end
121
124
 
122
125
  # Since we're not logged in, let's do that quick
123
- visit("https://www.facebook.com") if login_form.nil?
126
+ page.driver.browser.navigate.to("https://www.facebook.com") if login_form.nil?
124
127
 
125
128
  login_form.fill_in("email", with: ENV["FACEBOOK_EMAIL"])
126
129
  login_form.fill_in("pass", with: ENV["FACEBOOK_PASSWORD"])
@@ -39,9 +39,14 @@ module Forki
39
39
  profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }
40
40
 
41
41
  json = JSON.parse(profile_title_section)
42
- followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
43
- node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
44
- end
42
+
43
+ followers_node = []
44
+ begin
45
+ followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
46
+ node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
47
+ end
48
+ rescue NoMethodError; end
49
+
45
50
  if followers_node.empty?
46
51
  number_of_followers = nil
47
52
  else
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-23 00:00:00.000000000 Z
11
+ date: 2023-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara