zorki 0.1.27 → 0.1.29

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c5472c0d436e13f2e8554b59051546fed9400ad793de71b9b2d546bb5bd02d08
4
- data.tar.gz: d62650105cb0f41a48a93d4379e077a4c1b658e96ae13a30c1d8073f8f2e0546
3
+ metadata.gz: 684dbf1fee604da4f2ab9687223bf3aea6f1ebad0a4f9e16ef7c80dafa645ac0
4
+ data.tar.gz: ab628433652062674b97318864663082cf5c7e623d32ff4ec87494d54df79788
5
5
  SHA512:
6
- metadata.gz: 84a98236f4ca36daf440a8aea29acec2fa6963508bae78f5ee7c4d92c2ffedf19ef8db4050deadaa5090ea770132d2a47c64a1bab87f52329bdf18dd31f4aa2e
7
- data.tar.gz: e1b635b352163d08dc0ea9b5e74b3cb990a4f9a7d91ce29296ae2150692612c2a7a81fc9e04bfd33cedfd5c4dab7031e5f06a802a25032aed15036550f306328
6
+ metadata.gz: cfec123b6b4f1abb61c77c963b4162a9ef87273e3c3bc1dc7a67be34a5af5c38dbc70f4a43d3ae5690a3d5655a54d3081545352eb0768b926d598a575c1c3654
7
+ data.tar.gz: d7cc5599d995dcacc5d38dc60669f67f75d6005f282d9013ceb12bdb463b87ddf78eb9a156bf5e52ad0d3f464f1adea89abc2cc0ae371b4cbcbb981b72124593
@@ -40,6 +40,9 @@ module Zorki
40
40
  Capybara.app_host = "https://instagram.com"
41
41
 
42
42
  # video slideshows https://www.instagram.com/p/CY7KxwYOFBS/?utm_source=ig_embed&utm_campaign=loading
43
+ #
44
+ # TODO: Check if post is available publically before trying to login
45
+ # Should help with the scraping
43
46
  login
44
47
  graphql_object = get_content_of_subpage_from_url(
45
48
  "https://www.instagram.com/p/#{id}/",
@@ -47,7 +50,6 @@ module Zorki
47
50
  "data,xdt_api__v1__media__shortcode__web_info,items"
48
51
  )
49
52
 
50
-
51
53
  graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
52
54
 
53
55
  # For pages that have been marked misinfo the structure is very different than not
@@ -149,6 +151,7 @@ module Zorki
149
151
  end
150
152
 
151
153
  # Take the screenshot and return it
154
+ # rubocop:disable Link/Debugger
152
155
  save_screenshot("#{Zorki.temp_storage_location}/instagram_screenshot_#{SecureRandom.uuid}.png")
153
156
  end
154
157
  end
@@ -70,13 +70,10 @@ module Zorki
70
70
  # the one we want, and then moves on.
71
71
  response_body = nil
72
72
 
73
- responses = []
74
73
  page.driver.browser.intercept do |request, &continue|
75
74
  # This passes the request forward unmodified, since we only care about the response
76
- #
77
- # responses.first.post_data.include?("render_surface%22%3A%22PROFILE")
78
75
  continue.call(request) && next unless request.url.include?(subpage_search)
79
- continue.call(request) && next unless !post_data_include.nil? && request.post_data.include?(post_data_include)
76
+ continue.call(request) && next unless !post_data_include.nil? && request.post_data&.include?(post_data_include)
80
77
 
81
78
  continue.call(request) do |response|
82
79
  puts "***********************************************************"
@@ -113,6 +110,10 @@ module Zorki
113
110
  end
114
111
  rescue Selenium::WebDriver::Error::WebDriverError
115
112
  # Eat them
113
+ rescue StandardError => e
114
+ puts "***********************************************************"
115
+ puts "Error in intercept: #{e}"
116
+ puts "***********************************************************"
116
117
  end
117
118
 
118
119
  # Now that the intercept is set up, we visit the page we want
@@ -131,6 +132,7 @@ module Zorki
131
132
  # If this is a page that has not been marked as misinfo we can just pull the data
132
133
  # TODO: put this before the whole load loop
133
134
  if response_body.nil?
135
+
134
136
  doc = Nokogiri::HTML(page.driver.browser.page_source)
135
137
  # elements = doc.search("script").find_all do |e|
136
138
  # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
@@ -142,13 +144,13 @@ module Zorki
142
144
  element_json = OJ.load(element.text)
143
145
 
144
146
  # if element.text.include?("jokoy.komi.io")
145
- # debugger
146
- # if element_json["require"].first.last.first["__bbox"].key?("require")
147
+ # debugger
148
+ # if element_json["require"].first.last.first["__bbox"].key?("require")
147
149
 
148
- # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
149
- # debugger if x.to_s.include?("Si mulut pelaut")
150
- # end
151
- # end
150
+ # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
151
+ # debugger if x.to_s.include?("Si mulut pelaut")
152
+ # end
153
+ # end
152
154
  # end
153
155
 
154
156
  parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
@@ -24,13 +24,15 @@ module Zorki
24
24
  begin
25
25
  login
26
26
 
27
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", "data,user,media_count", post_data_include: "render_surface")
27
+ # This is searching for a specific request, the reason it's weird is because it's uri encoded
28
+ # graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", "data,user,media_count", post_data_include: "render_surface%22%3A%22PROFILE")
29
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", nil, post_data_include: "render_surface%22%3A%22PROFILE")
28
30
  graphql_script = graphql_script.first if graphql_script.class == Array
29
31
 
30
32
  if graphql_script.nil?
31
33
  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
32
34
  end
33
- rescue Zorki::ContentUnavailableError => e
35
+ rescue Zorki::ContentUnavailableError
34
36
  count += 1
35
37
 
36
38
  if count > 3
@@ -100,8 +102,7 @@ module Zorki
100
102
  profile_image_url: profile_image_url
101
103
  }
102
104
  end
103
- rescue Zorki::ContentUnavailableError => e
104
- debugger
105
+ rescue Zorki::ContentUnavailableError
105
106
  raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
106
107
  end
107
108
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.27"
4
+ VERSION = "0.1.29"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.27
4
+ version: 0.1.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-14 00:00:00.000000000 Z
11
+ date: 2024-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara