forki 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb9eb80820ccf86f9a99d1c757940a935137d834a2bd76d855deaeee470840a1
4
- data.tar.gz: 667f0600afdf53ced62b57def3021a6ccc0cf2a194142e2f23beb76c8384a35a
3
+ metadata.gz: a0667614af238aeb8089c1af60794918c0986b7f5e12fa6fe96d33a2a5c1a06f
4
+ data.tar.gz: 5668a5a6056bdf9bdd9c9fd8f119fa5ed754c5ee0b152d2764b26e9f9d7a8804
5
5
  SHA512:
6
- metadata.gz: 78aa5d9bf07a1f5790f8e1705056956b30a7d2854723b74fdfd0a7612be2c3988916333bfc5fc15dff62d62fda1d01452f375765b4e1ddc6633148372993ebbb
7
- data.tar.gz: 21d6c81749a1f1fe160508e9efe53ca6abc2be59b01eb31684a500df79b9092647de47d1545ea21c0c701c5d1e34f2f40c35dcf0765bdfce6d7c72ec60feee2b
6
+ metadata.gz: 509ffb30dada2666236ed0900e1def8a61413ee3ba2e8705e31bd01422f8d2baf2cd24479faf906b6374602fcb0c1dc7bb334b7e02a560c798eba6efab30a2c9
7
+ data.tar.gz: 3f79b804b5505222e06a49352807d92c95f13a8735bedcd1b32cbf079be72b0922052014ef82da8771db108f7b0b674cd132caf7d377084c57cf0d245eb049e1
data/Gemfile CHANGED
@@ -19,3 +19,5 @@ gem "dotenv", "~> 2.7.6"
19
19
  gem "byebug"
20
20
 
21
21
  gem "rack", "2.2.4"
22
+
23
+ gem "curb", "~> 1.0", ">= 1.0.5"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forki (0.1.0)
4
+ forki (0.1.1)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -48,6 +48,7 @@ GEM
48
48
  xpath (~> 3.2)
49
49
  concurrent-ruby (1.2.2)
50
50
  crass (1.0.6)
51
+ curb (1.0.5)
51
52
  dotenv (2.7.6)
52
53
  erubi (1.12.0)
53
54
  ethon (0.16.0)
@@ -150,6 +151,7 @@ PLATFORMS
150
151
 
151
152
  DEPENDENCIES
152
153
  byebug
154
+ curb (~> 1.0, >= 1.0.5)
153
155
  dotenv (~> 2.7.6)
154
156
  forki!
155
157
  minitest (~> 5.0)
@@ -62,8 +62,16 @@ module Forki
62
62
 
63
63
  def check_if_post_is_image(graphql_objects)
64
64
  graphql_objects.any? do |graphql_object| # if any GraphQL objects contain the top-level keys above, return true
65
- true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
66
- true unless graphql_object.fetch("currMedia", nil).nil?
65
+ return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
66
+ return true unless graphql_object.fetch("currMedia", nil).nil?
67
+
68
+ # This is a complicated form for `web.facebook.com` posts
69
+
70
+ if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
71
+ if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
72
+ return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
73
+ end
74
+ end
67
75
  end
68
76
  end
69
77
 
@@ -220,26 +228,53 @@ module Forki
220
228
 
221
229
  # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
222
230
  def extract_image_post_data(graphql_object_array)
223
- graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
224
- curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
225
- creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
231
+ # This is a weird one-off style
232
+ graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
233
+ unless graphql_object.nil? || graphql_object.count == 0
234
+ attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
235
+
236
+ reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
237
+ id = graphql_object["node"]["post_id"]
238
+ num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
239
+ reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
240
+ image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
241
+ text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
242
+ profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
243
+ created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
244
+ has_video = false
245
+ else
226
246
 
227
- feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
228
- share_count_object = feedback_object.fetch("share_count", {})
247
+ graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
248
+ curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
249
+ creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
250
+
251
+ feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
252
+ share_count_object = feedback_object.fetch("share_count", {})
253
+
254
+ poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
229
255
 
230
- poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
256
+ reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
257
+ id = curr_media_object["currMedia"]["id"],
258
+ num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
259
+ num_shares = share_count_object.fetch("count", nil),
260
+ reshare_warning = feedback_object["should_show_reshare_warning"],
261
+ image_url = curr_media_object["currMedia"]["image"]["uri"],
262
+ text = (creation_story_object["message"] || {}).fetch("text", nil),
263
+ profile_link = poster["url"],
264
+ created_at = curr_media_object["currMedia"]["created_time"],
265
+ has_video = false
231
266
 
232
- reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
267
+ end
233
268
  post_details = {
234
- id: curr_media_object["currMedia"]["id"],
235
- num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
236
- num_shares: share_count_object.fetch("count", nil),
237
- reshare_warning: feedback_object["should_show_reshare_warning"],
238
- image_url: curr_media_object["currMedia"]["image"]["uri"],
239
- text: (creation_story_object["message"] || {}).fetch("text", nil),
240
- profile_link: poster["url"],
241
- created_at: curr_media_object["currMedia"]["created_time"],
242
- has_video: false
269
+ id: id,
270
+ num_comments: num_comments,
271
+ num_shares: num_shares,
272
+ reshare_warning: reshare_warning,
273
+ image_url: image_url,
274
+ text: text,
275
+ profile_link: profile_link,
276
+ created_at: created_at,
277
+ has_video: has_video
243
278
  }
244
279
  post_details[:image_file] = Forki.retrieve_media(post_details[:image_url])
245
280
  post_details[:reactions] = reaction_counts
@@ -349,8 +384,9 @@ module Forki
349
384
  page.quit
350
385
 
351
386
  post_data
352
- rescue Net::ReadTimeout
353
- # Eat it?
387
+ rescue Net::ReadTimeout => e
388
+ puts "Time out error: #{e}"
389
+ puts e.backtrace
354
390
  rescue StandardError => e
355
391
  raise e
356
392
  ensure
@@ -6,6 +6,7 @@ require "dotenv/load"
6
6
  require "oj"
7
7
  require "selenium-webdriver"
8
8
  require "open-uri"
9
+ require "selenium/webdriver/remote/http/curb"
9
10
 
10
11
  options = Selenium::WebDriver::Options.chrome(exclude_switches: ["enable-automation"])
11
12
  options.add_argument("--start-maximized")
@@ -20,8 +21,8 @@ options.add_argument("--remote-debugging-port=9222")
20
21
  options.add_argument("--user-data-dir=/tmp/tarun_forki_#{SecureRandom.uuid}")
21
22
 
22
23
  Capybara.register_driver :selenium_forki do |app|
23
- client = Selenium::WebDriver::Remote::Http::Default.new
24
- client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
24
+ client = Selenium::WebDriver::Remote::Http::Curb.new
25
+ # client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
25
26
  Capybara::Selenium::Driver.new(app, browser: :chrome, options: options, http_client: client)
26
27
  end
27
28
 
@@ -97,8 +98,8 @@ module Forki
97
98
  options.add_argument("--user-data-dir=/tmp/tarun_forki_#{SecureRandom.uuid}")
98
99
 
99
100
  Capybara.register_driver :selenium_forki do |app|
100
- client = Selenium::WebDriver::Remote::Http::Default.new
101
- client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
101
+ client = Selenium::WebDriver::Remote::Http::Curb.new
102
+ # client.read_timeout = 60 # Don't wait 60 seconds to return Net::ReadTimeoutError. We'll retry through Hypatia after 10 seconds
102
103
  Capybara::Selenium::Driver.new(app, browser: :chrome, options: options, http_client: client)
103
104
  end
104
105
 
@@ -110,7 +111,9 @@ module Forki
110
111
  raise MissingCredentialsError if ENV["FACEBOOK_EMAIL"].nil? || ENV["FACEBOOK_PASSWORD"].nil?
111
112
 
112
113
  url ||= "https://www.facebook.com"
113
- visit(url) # Visit the url passed in or the facebook homepage if nothing is
114
+
115
+
116
+ page.driver.browser.navigate.to(url) # Visit the url passed in or the facebook homepage if nothing is
114
117
 
115
118
  # Look for "login_form" box, which throws an error if not found. So we catch it and run the rest of the tests
116
119
  begin
@@ -120,7 +123,7 @@ module Forki
120
123
  end
121
124
 
122
125
  # Since we're not logged in, let's do that quick
123
- visit("https://www.facebook.com") if login_form.nil?
126
+ page.driver.browser.navigate.to("https://www.facebook.com") if login_form.nil?
124
127
 
125
128
  login_form.fill_in("email", with: ENV["FACEBOOK_EMAIL"])
126
129
  login_form.fill_in("pass", with: ENV["FACEBOOK_PASSWORD"])
@@ -39,9 +39,14 @@ module Forki
39
39
  profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }
40
40
 
41
41
  json = JSON.parse(profile_title_section)
42
- followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
43
- node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
44
- end
42
+
43
+ followers_node = []
44
+ begin
45
+ followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
46
+ node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
47
+ end
48
+ rescue NoMethodError; end
49
+
45
50
  if followers_node.empty?
46
51
  number_of_followers = nil
47
52
  else
data/lib/forki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Forki
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-23 00:00:00.000000000 Z
11
+ date: 2023-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara