RubyGems - zorki - Versions diffs - 0.1.27 → 0.1.28 - Mend

zorki 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/zorki/scrapers/post_scraper.rb +4 -0
data/lib/zorki/scrapers/scraper.rb +12 -8
data/lib/zorki/scrapers/user_scraper.rb +2 -3
data/lib/zorki/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c5472c0d436e13f2e8554b59051546fed9400ad793de71b9b2d546bb5bd02d08
-  data.tar.gz: d62650105cb0f41a48a93d4379e077a4c1b658e96ae13a30c1d8073f8f2e0546
+  metadata.gz: 0fb9866c1d2efb0e686e6c0edd4f268c452cc18ed2f2481b46cbc1b8f2c02445
+  data.tar.gz: bafdf519a9b2ed1c5fb2f0711ebbf7bf7909e32769290bfe6286a0463056edc7
 SHA512:
-  metadata.gz: 84a98236f4ca36daf440a8aea29acec2fa6963508bae78f5ee7c4d92c2ffedf19ef8db4050deadaa5090ea770132d2a47c64a1bab87f52329bdf18dd31f4aa2e
-  data.tar.gz: e1b635b352163d08dc0ea9b5e74b3cb990a4f9a7d91ce29296ae2150692612c2a7a81fc9e04bfd33cedfd5c4dab7031e5f06a802a25032aed15036550f306328
+  metadata.gz: 13f0bce3dbe9ee6d029f79569a27d287c6679643aa0fcdbc3e176a5667d214664eae046e4f2700aab712f4f3b2e96c5535f3d05c6204fe2856c0101b911be5f6
+  data.tar.gz: 6279ee4bb40c5ad8a6e74be86343027d5b7b122af763274dad96eb3c60d46b30de14acc7f6e57b70b5532888f022b1bcc4db5a8b87d0281471bab519a9faf067

data/lib/zorki/scrapers/post_scraper.rb CHANGED Viewed

@@ -40,6 +40,9 @@ module Zorki
       Capybara.app_host = "https://instagram.com"
       # video slideshows https://www.instagram.com/p/CY7KxwYOFBS/?utm_source=ig_embed&utm_campaign=loading
+      #
+      # TODO: Check if post is available publically before trying to login
+      # Should help with the scraping
       login
       graphql_object = get_content_of_subpage_from_url(
         "https://www.instagram.com/p/#{id}/",
@@ -149,6 +152,7 @@ module Zorki
       end
       # Take the screenshot and return it
+      # rubocop:disable Link/Debugger
       save_screenshot("#{Zorki.temp_storage_location}/instagram_screenshot_#{SecureRandom.uuid}.png")
     end
   end

data/lib/zorki/scrapers/scraper.rb CHANGED Viewed

@@ -70,13 +70,12 @@ module Zorki
       # the one we want, and then moves on.
       response_body = nil
-      responses = []
       page.driver.browser.intercept do |request, &continue|
         # This passes the request forward unmodified, since we only care about the response
         #
         # responses.first.post_data.include?("render_surface%22%3A%22PROFILE")
         continue.call(request) && next unless request.url.include?(subpage_search)
-        continue.call(request) && next unless !post_data_include.nil? && request.post_data.include?(post_data_include)
+        continue.call(request) && next unless !post_data_include.nil? && request.post_data&.include?(post_data_include)
         continue.call(request) do |response|
           puts "***********************************************************"
@@ -113,6 +112,10 @@ module Zorki
         end
       rescue Selenium::WebDriver::Error::WebDriverError
         # Eat them
+      rescue StandardError => e
+        puts "***********************************************************"
+        puts "Error in intercept: #{e}"
+        puts "***********************************************************"
       end
       # Now that the intercept is set up, we visit the page we want
@@ -131,6 +134,7 @@ module Zorki
       # If this is a page that has not been marked as misinfo we can just pull the data
       # TODO: put this before the whole load loop
       if response_body.nil?
         doc = Nokogiri::HTML(page.driver.browser.page_source)
         # elements = doc.search("script").find_all do |e|
         #   e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
@@ -142,13 +146,13 @@ module Zorki
             element_json = OJ.load(element.text)
             # if element.text.include?("jokoy.komi.io")
-              # debugger
-              # if element_json["require"].first.last.first["__bbox"].key?("require")
+            # debugger
+            # if element_json["require"].first.last.first["__bbox"].key?("require")
-              #   element_json["require"].first.last.first["__bbox"]["require"].each do |x|
-              #     debugger if x.to_s.include?("Si mulut pelaut")
-              #   end
-              # end
+            #   element_json["require"].first.last.first["__bbox"]["require"].each do |x|
+            #     debugger if x.to_s.include?("Si mulut pelaut")
+            #   end
+            # end
             # end
             parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]

data/lib/zorki/scrapers/user_scraper.rb CHANGED Viewed

@@ -30,7 +30,7 @@ module Zorki
           if graphql_script.nil?
             graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
           end
-        rescue Zorki::ContentUnavailableError => e
+        rescue Zorki::ContentUnavailableError
           count += 1
           if count > 3
@@ -100,8 +100,7 @@ module Zorki
           profile_image_url: profile_image_url
         }
       end
-    rescue Zorki::ContentUnavailableError => e
-      debugger
+    rescue Zorki::ContentUnavailableError
       raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
     end
   end

data/lib/zorki/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Zorki
-  VERSION = "0.1.27"
+  VERSION = "0.1.28"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: zorki
 version: !ruby/object:Gem::Version
-  version: 0.1.27
+  version: 0.1.28
 platform: ruby
 authors:
 - Christopher Guess
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-10-14 00:00:00.000000000 Z
+date: 2024-10-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: capybara