RubyGems - forki - Versions diffs - 0.2.5 → 0.2.7 - Mend

forki 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/Gemfile.lock +12 -10
data/lib/forki/scrapers/post_scraper.rb +105 -26
data/lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb +1 -3
data/lib/forki/version.rb +1 -1
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cd276bc782515b3b7935eab759da6cf2acdafc146798a56e3e21ee667358ad3c
-  data.tar.gz: 532d2c06542a0bdfe4e22f1a0b2ec3cf3ded6057b30de93386fad442bfa686ab
+  metadata.gz: cf93e965787eaf05b26f6ea1377775fb61ed131a52458a371336474d09e4a639
+  data.tar.gz: 729e9409bf76eb8551913f64e02d3905a878d057f045e16a64f712926d0d5cc8
 SHA512:
-  metadata.gz: 46aaf4eef616f99ca44eac48134b151af41e1755b808c2efef18fdca905956d8f5cb72d54e61070f64ae883b8e64e88f5adfb94f0d41a166eae136f0474133df
-  data.tar.gz: 36c8bc7c506c952c036f38eb655522fad7a6b4cd6bf7c3de11e78e60aec4f5c3f124d5449e1aa40425a22cacee71e7c393239092a7e6dbf057a7e46e90457f1a
+  metadata.gz: 6d710aee0bb1ae64c3796de31f85a9e39f26791bd12a653785544099d9e10629a3a239f69dff5702b80b009d1e3b6c9abfef50109e7078dcb70194f9f5c65384
+  data.tar.gz: 0b34b2dceaeff07c844e9fc8b42f27bfe3fbaae2c631bfe80f605f1c7caa821f0387780b37cc1bdb9b6735c20a967ba51e2d6a58a0a0478ee624520ab060402d

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    forki (0.2.1)
+    forki (0.2.5)
       apparition
       capybara
       oj
@@ -29,12 +29,13 @@ GEM
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
       tzinfo (~> 2.0)
-    addressable (2.8.4)
+    addressable (2.8.6)
       public_suffix (>= 2.0.2, < 6.0)
     apparition (0.6.0)
       capybara (~> 3.13, < 4)
       websocket-driver (>= 0.6.5)
     ast (2.4.2)
+    bigdecimal (3.1.5)
     builder (3.2.4)
     byebug (11.1.3)
     capybara (3.39.2)
@@ -53,7 +54,7 @@ GEM
     erubi (1.12.0)
     ethon (0.16.0)
       ffi (>= 1.15.0)
-    ffi (1.15.5)
+    ffi (1.16.3)
     i18n (1.13.0)
       concurrent-ruby (~> 1.0)
     json (2.6.3)
@@ -62,15 +63,16 @@ GEM
       nokogiri (>= 1.12.0)
     matrix (0.4.2)
     method_source (1.0.0)
-    mini_mime (1.1.2)
+    mini_mime (1.1.5)
     minitest (5.18.0)
     nokogiri (1.15.1-arm64-darwin)
       racc (~> 1.4)
-    oj (3.15.1)
+    oj (3.16.3)
+      bigdecimal (>= 3.0)
     parallel (1.23.0)
     parser (3.2.2.1)
       ast (~> 2.4.1)
-    public_suffix (5.0.3)
+    public_suffix (5.0.4)
     racc (1.6.2)
     rack (2.2.4)
     rack-test (2.1.0)
@@ -90,7 +92,7 @@ GEM
     rainbow (3.1.1)
     rake (13.0.6)
     regexp_parser (2.8.0)
-    rexml (3.2.5)
+    rexml (3.2.6)
     rubocop (1.51.0)
       json (~> 2.3)
       parallel (~> 1.10)
@@ -127,17 +129,17 @@ GEM
       rubocop-rails (~> 2.0)
     ruby-progressbar (1.13.0)
     rubyzip (2.3.2)
-    selenium-webdriver (4.11.0)
+    selenium-webdriver (4.16.0)
       rexml (~> 3.2, >= 3.2.5)
       rubyzip (>= 1.2.2, < 3.0)
       websocket (~> 1.0)
     thor (1.2.2)
-    typhoeus (1.4.0)
+    typhoeus (1.4.1)
       ethon (>= 0.9.0)
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.4.2)
-    websocket (1.2.9)
+    websocket (1.2.10)
     websocket-driver (0.7.6)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)

data/lib/forki/scrapers/post_scraper.rb CHANGED Viewed

@@ -65,14 +65,26 @@ module Forki
       graphql_objects.any? do |graphql_object|  # if any GraphQL objects contain the top-level keys above, return true
         return true unless graphql_object.fetch("image", nil).nil? # so long as the associated values are not nil
         return true unless graphql_object.fetch("currMedia", nil).nil?
+        return true unless graphql_object.fetch("photo_image", nil).nil?
         # This is a complicated form for `web.facebook.com` posts
         if !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil?
           if graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].count.positive?
             return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "all_subattachments", "nodes")&.first&.dig("media", "image", "uri").nil?
+            # Another version I guess
+            return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "large_share_image")&.dig("uri").nil?
           end
         end
+        # Another weird format
+        begin
+          if !graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].empty?
+            return true unless graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"].first.dig("styles", "attachment", "media", "photo_image", "uri").nil?
+          end
+        rescue StandardError
+        end
       end
     end
@@ -157,7 +169,7 @@ module Forki
       graphql_object_array = graphql_strings.map { |graphql_string| JSON.parse(graphql_string) }
       # Once in awhile it's really easy
-      video_objects = graphql_object_array.filter {|go| go.has_key?("video") }
+      video_objects = graphql_object_array.filter { |go| go.has_key?("video") }
       if VideoSieve.can_process_with_sieve?(graphql_object_array)
         # Eventually all of this complexity will be replaced with this
@@ -170,9 +182,15 @@ module Forki
       return extract_video_post_data_alternative(graphql_object_array) if story_node_object.nil?
       if story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("media")
-        video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
-        creation_date = video_object["publish_time"] if video_object&.has_key("publish_time")
-        creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"] if creation_date.nil?
+        media_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]
+        if media_object.has_key?("video")
+          video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["video"]
+        elsif media_object.has_key?("media") && media_object["media"].has_key?("browser_native_sd_url")
+          video_object = media_object["media"]
+        end
+        creation_date = video_object["publish_time"] if video_object&.has_key?("publish_time")
+        creation_date = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["media"]["publish_time"] if creation_date.nil?
       elsif story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"].key?("style_infos")
         # For "Reels" we need a separate way to parse this
         video_object = story_node_object["comet_sections"]["content"]["story"]["attachments"].first["styles"]["attachment"]["style_infos"].first["fb_shorts_story"]["short_form_video_context"]["playback_video"]
@@ -181,13 +199,20 @@ module Forki
         raise "Unable to parse video object" if video_objects.empty?
       end
-      feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
+      begin
+        feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]
+      rescue NoMethodError
+        feedback_object = story_node_object["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]
+      end
       if feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"].key?("cannot_see_top_custom_reactions")
         reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
       else
         reaction_counts = extract_reaction_counts(feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
       end
+      feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
       share_count_object = feedback_object.fetch("share_count", {})
       if story_node_object["comet_sections"]["content"]["story"]["comet_sections"].key? "message"
@@ -202,20 +227,31 @@ module Forki
         else
           num_comments = feedback_object["comment_list_renderer"]["feedback"]["total_comment_count"]
         end
+        view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
+        reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
+      elsif feedback_object.has_key?("comments_count_summary_renderer")
+        num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
+        view_count = feedback_object["video_view_count"]
+        reshare_warning = feedback_object["should_show_reshare_warning"]
       else
         if feedback_object["feedback"].key?("comment_count")
           num_comments = feedback_object["feedback"]["comment_count"]["total_count"]
         else
           num_comments = feedback_object["feedback"]["total_comment_count"]
         end
+        view_count = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"]
+        reshare_warning = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
       end
       post_details = {
         id: video_object["id"],
         num_comments: num_comments,
         num_shares: share_count_object.fetch("count", nil),
-        num_views: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["video_view_count"],
-        reshare_warning: feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"],
+        num_views: view_count,
+        reshare_warning: reshare_warning,
         video_preview_image_url: video_object["preferred_thumbnail"]["image"]["uri"],
         video_url: video_object["browser_native_hd_url"] || video_object["browser_native_sd_url"],
         text: text,
@@ -242,9 +278,15 @@ module Forki
       share_count_object = feedback_object.fetch("share_count", {})
+      if feedback_object["comments_count_summary_renderer"]["feedback"].has_key?("comment_rendering_instance")
+        num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
+      else
+        num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"]
+      end
       post_details = {
         id: video_object["id"],
-        num_comments: feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
+        num_comments: num_comments,
         num_shares: share_count_object.fetch("count", nil),
         num_views: feedback_object["video_view_count"],
         reshare_warning: feedback_object["should_show_reshare_warning"],
@@ -265,31 +307,64 @@ module Forki
     # Extracts data from an image post by parsing GraphQL strings as seen in the video post scraper above
     def extract_image_post_data(graphql_object_array)
       # This is a weird one-off style
       graphql_object = graphql_object_array.find { |graphql_object| !graphql_object.dig("node", "comet_sections", "content", "story", "attachments").nil? }
-      unless graphql_object.nil? || graphql_object.count == 0
+      unless graphql_object.nil? || graphql_object.count.zero?
+        # TODO: These two branches are *super* similar, probably a lot of overlap
         attachments = graphql_object["node"]["comet_sections"]["content"]["story"]["attachments"]
-        if graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"].has_key?("cannot_see_top_custom_reactions")
-          reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["cannot_see_top_custom_reactions"]["top_reactions"])
+        if graphql_object["node"]["comet_sections"]["feedback"]["story"].key?("feedback_context")
+          feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
+        elsif graphql_object["node"]["comet_sections"]["feedback"]["story"].has_key?("comet_feed_ufi_container")
+          feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["comet_feed_ufi_container"]["story"]["story_ufi_container"]["story"]["feedback_context"]["feedback_target_with_context"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
+        else
+          feedback_object = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]
+        end
+        if feedback_object.has_key?("cannot_see_top_custom_reactions")
+          reaction_counts = extract_reaction_counts(feedback_object["cannot_see_top_custom_reactions"]["top_reactions"])
         else
-          reaction_counts = extract_reaction_counts(graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["top_reactions"])
+          reaction_counts = extract_reaction_counts(feedback_object["top_reactions"])
         end
         id = graphql_object["node"]["post_id"]
-        num_comments = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["share_count"]["count"]
-        reshare_warning = graphql_object["node"]["comet_sections"]["feedback"]["story"]["feedback_context"]["feedback_target_with_context"]["ufi_renderer"]["feedback"]["comet_ufi_summary_and_actions_renderer"]["feedback"]["should_show_reshare_warning"]
-        image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
+        num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
+        reshare_warning = feedback_object["should_show_reshare_warning"]
+        if attachments.first["styles"]["attachment"].key?("all_subattachments")
+          image_url = attachments.first["styles"]["attachment"]["all_subattachments"]["nodes"].first["media"]["image"]["uri"]
+        else
+          image_url = attachments.first.dig("styles", "attachment", "media", "photo_image", "uri")
+          if image_url.nil?
+            image_url = attachments.first["styles"]["attachment"]["media"]["large_share_image"]["uri"]
+          end
+        end
         text = graphql_object["node"]["comet_sections"]["content"]["story"]["message"]["text"]
         profile_link = graphql_object["node"]["comet_sections"]["content"]["story"]["actors"].first["url"]
-        created_at = graphql_object["node"]["comet_sections"]["content"]["story"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
+        unless graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata").nil?
+          created_at = graphql_object["node"]["comet_sections"].dig("content", "story", "comet_sections", "context_layout", "story", "comet_sections", "metadata")&.first["story"]["creation_time"]
+        else
+          created_at = graphql_object["node"]["comet_sections"]["context_layout"]["story"]["comet_sections"]["metadata"].first["story"]["creation_time"]
+        end
         has_video = false
       else
         graphql_object_array.find { |graphql_object| graphql_object.key?("viewer_actor") && graphql_object.key?("display_comments") }
         curr_media_object = graphql_object_array.find { |graphql_object| graphql_object.key?("currMedia") }
         creation_story_object = graphql_object_array.find { |graphql_object| graphql_object.key?("creation_story") && graphql_object.key?("message") }
         feedback_object = graphql_object_array.find { |graphql_object| graphql_object.has_key?("comet_ufi_summary_and_actions_renderer") }["comet_ufi_summary_and_actions_renderer"]["feedback"]
+        if feedback_object.key?("top_reactions")
+          feedback_object = feedback_object
+        else
+          # POSSIBLY OUT OF DATE
+          feedback_object = feedback_object["comet_ufi_summary_and_actions_renderer"]["feedback"]
+        end
         share_count_object = feedback_object.fetch("share_count", {})
         poster = creation_story_object["creation_story"]["comet_sections"]["actor_photo"]["story"]["actors"][0]
@@ -301,16 +376,21 @@ module Forki
         end
         id = curr_media_object["currMedia"]["id"],
         num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["total_comment_count"],
-        num_shares = share_count_object.fetch("count", nil),
-        reshare_warning = feedback_object["should_show_reshare_warning"],
-        image_url = curr_media_object["currMedia"]["image"]["uri"],
-        text = (creation_story_object["message"] || {}).fetch("text", nil),
-        profile_link = poster["url"],
-        created_at = curr_media_object["currMedia"]["created_time"],
-        has_video = false
+        if num_comments.nil? && feedback_object.has_key?("comments_count_summary_renderer")
+          num_comments = feedback_object["comments_count_summary_renderer"]["feedback"]["comment_rendering_instance"]["comments"]["total_count"]
+        end
+        num_shares = share_count_object.fetch("count", nil)
+        reshare_warning = feedback_object["should_show_reshare_warning"]
+        image_url = curr_media_object["currMedia"]["image"]["uri"]
+        text = (creation_story_object["message"] || {}).fetch("text", nil)
+        profile_link = poster["url"]
+        created_at = curr_media_object["currMedia"]["created_time"]
+        has_video = false
       end
       post_details = {
         id: id,
         num_comments: num_comments,
@@ -457,4 +537,3 @@ module Forki
 end
 require_relative "sieves/video_sieves/video_sieve"

data/lib/forki/scrapers/sieves/image_sieves/image_sieve.rb.rb CHANGED Viewed

@@ -4,7 +4,6 @@ class ImageSieve
   end
   def self.sieve_for_graphql_objects(graphql_objects)
     sieve = sieve_class_for_graphql_objects(graphql_objects)
     return nil if sieve.nil?
@@ -19,7 +18,6 @@ private
   end
 end
-Dir['./lib/forki/scrapers/sieves/image_sieves/*.rb'].each do |file|
+Dir["./lib/forki/scrapers/sieves/image_sieves/*.rb"].each do |file|
   require file unless file.end_with?("image_sieve.rb")
 end

data/lib/forki/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Forki
-  VERSION = "0.2.5"
+  VERSION = "0.2.7"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: forki
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.2.7
 platform: ruby
 authors:
 - ''
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-12-14 00:00:00.000000000 Z
+date: 2024-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: capybara
@@ -165,7 +165,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.20
+rubygems_version: 3.5.9
 signing_key:
 specification_version: 4
 summary: A gem to scrape Facebook pages for archive purposes.