zorki 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c5472c0d436e13f2e8554b59051546fed9400ad793de71b9b2d546bb5bd02d08
4
- data.tar.gz: d62650105cb0f41a48a93d4379e077a4c1b658e96ae13a30c1d8073f8f2e0546
3
+ metadata.gz: 0fb9866c1d2efb0e686e6c0edd4f268c452cc18ed2f2481b46cbc1b8f2c02445
4
+ data.tar.gz: bafdf519a9b2ed1c5fb2f0711ebbf7bf7909e32769290bfe6286a0463056edc7
5
5
  SHA512:
6
- metadata.gz: 84a98236f4ca36daf440a8aea29acec2fa6963508bae78f5ee7c4d92c2ffedf19ef8db4050deadaa5090ea770132d2a47c64a1bab87f52329bdf18dd31f4aa2e
7
- data.tar.gz: e1b635b352163d08dc0ea9b5e74b3cb990a4f9a7d91ce29296ae2150692612c2a7a81fc9e04bfd33cedfd5c4dab7031e5f06a802a25032aed15036550f306328
6
+ metadata.gz: 13f0bce3dbe9ee6d029f79569a27d287c6679643aa0fcdbc3e176a5667d214664eae046e4f2700aab712f4f3b2e96c5535f3d05c6204fe2856c0101b911be5f6
7
+ data.tar.gz: 6279ee4bb40c5ad8a6e74be86343027d5b7b122af763274dad96eb3c60d46b30de14acc7f6e57b70b5532888f022b1bcc4db5a8b87d0281471bab519a9faf067
@@ -40,6 +40,9 @@ module Zorki
40
40
  Capybara.app_host = "https://instagram.com"
41
41
 
42
42
  # video slideshows https://www.instagram.com/p/CY7KxwYOFBS/?utm_source=ig_embed&utm_campaign=loading
43
+ #
44
+ # TODO: Check if post is available publically before trying to login
45
+ # Should help with the scraping
43
46
  login
44
47
  graphql_object = get_content_of_subpage_from_url(
45
48
  "https://www.instagram.com/p/#{id}/",
@@ -149,6 +152,7 @@ module Zorki
149
152
  end
150
153
 
151
154
  # Take the screenshot and return it
155
+ # rubocop:disable Link/Debugger
152
156
  save_screenshot("#{Zorki.temp_storage_location}/instagram_screenshot_#{SecureRandom.uuid}.png")
153
157
  end
154
158
  end
@@ -70,13 +70,12 @@ module Zorki
70
70
  # the one we want, and then moves on.
71
71
  response_body = nil
72
72
 
73
- responses = []
74
73
  page.driver.browser.intercept do |request, &continue|
75
74
  # This passes the request forward unmodified, since we only care about the response
76
75
  #
77
76
  # responses.first.post_data.include?("render_surface%22%3A%22PROFILE")
78
77
  continue.call(request) && next unless request.url.include?(subpage_search)
79
- continue.call(request) && next unless !post_data_include.nil? && request.post_data.include?(post_data_include)
78
+ continue.call(request) && next unless !post_data_include.nil? && request.post_data&.include?(post_data_include)
80
79
 
81
80
  continue.call(request) do |response|
82
81
  puts "***********************************************************"
@@ -113,6 +112,10 @@ module Zorki
113
112
  end
114
113
  rescue Selenium::WebDriver::Error::WebDriverError
115
114
  # Eat them
115
+ rescue StandardError => e
116
+ puts "***********************************************************"
117
+ puts "Error in intercept: #{e}"
118
+ puts "***********************************************************"
116
119
  end
117
120
 
118
121
  # Now that the intercept is set up, we visit the page we want
@@ -131,6 +134,7 @@ module Zorki
131
134
  # If this is a page that has not been marked as misinfo we can just pull the data
132
135
  # TODO: put this before the whole load loop
133
136
  if response_body.nil?
137
+
134
138
  doc = Nokogiri::HTML(page.driver.browser.page_source)
135
139
  # elements = doc.search("script").find_all do |e|
136
140
  # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
@@ -142,13 +146,13 @@ module Zorki
142
146
  element_json = OJ.load(element.text)
143
147
 
144
148
  # if element.text.include?("jokoy.komi.io")
145
- # debugger
146
- # if element_json["require"].first.last.first["__bbox"].key?("require")
149
+ # debugger
150
+ # if element_json["require"].first.last.first["__bbox"].key?("require")
147
151
 
148
- # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
149
- # debugger if x.to_s.include?("Si mulut pelaut")
150
- # end
151
- # end
152
+ # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
153
+ # debugger if x.to_s.include?("Si mulut pelaut")
154
+ # end
155
+ # end
152
156
  # end
153
157
 
154
158
  parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
@@ -30,7 +30,7 @@ module Zorki
30
30
  if graphql_script.nil?
31
31
  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
32
32
  end
33
- rescue Zorki::ContentUnavailableError => e
33
+ rescue Zorki::ContentUnavailableError
34
34
  count += 1
35
35
 
36
36
  if count > 3
@@ -100,8 +100,7 @@ module Zorki
100
100
  profile_image_url: profile_image_url
101
101
  }
102
102
  end
103
- rescue Zorki::ContentUnavailableError => e
104
- debugger
103
+ rescue Zorki::ContentUnavailableError
105
104
  raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
106
105
  end
107
106
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.27"
4
+ VERSION = "0.1.28"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.27
4
+ version: 0.1.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-14 00:00:00.000000000 Z
11
+ date: 2024-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara