zorki 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d6f47b685aa9f49c602bc0082e7ac9084e0cb444acfa6a7989c5d5cd3d572e7
4
- data.tar.gz: 858f01c64e0efa666941222d0fade285f0c49843957cdbc0adc0dbe46e7c7ec0
3
+ metadata.gz: 19f084d94393dd8e69f559d7f9bc84385d80b1ac2730c0e957f031d5e67da38f
4
+ data.tar.gz: f15fbb5c622bcd2940a1adda2748178d1314ac579e31b94d859979c6e507df76
5
5
  SHA512:
6
- metadata.gz: 73a9ac083a40f2e7c8b03a315bd50b83545507ed9e1700d944fa8e1e0e781ce8cad6ff5a17e277d83bd1788cd7444774c5ae530be8c3205213c7dfcbfb27c9c4
7
- data.tar.gz: 201bc5e5eab638249bd4fb1e5dd859ed308e1e6fdbc4438d709144aa519bcef393404a8206929636944f65217af7e8597a6b8589f1a86f16ea3c6513e5806487
6
+ metadata.gz: ea049add265fd88524d894995f5cc3eabdf98c6b9610969ff7344dc86ce2e679ac0f2d6a5e9f606b2111806789d6a783c23dff9596f8a7dc66be8543eb8a3453
7
+ data.tar.gz: e6d9760c423d59280cee47262d2d92e50312c0b858ee9de2dbf4aafa32ee1cb5d7634b3a55ff2b36b5fadb2be8cc52f3b63190239a61c2f1b03116bd3a83c75f
@@ -171,7 +171,7 @@ module Zorki
171
171
  end
172
172
  elsif object.has_key?("display_resources")
173
173
  if object["is_video"] == true
174
- video = Zorki.retrieve_media(object["display_resources"].last["src"])
174
+ video = Zorki.retrieve_media(object["video_url"])
175
175
  video_preview_image = Zorki.retrieve_media(object["display_url"])
176
176
  else
177
177
  images << Zorki.retrieve_media(object["display_resources"].last["src"])
@@ -57,7 +57,7 @@ module Zorki
57
57
  # same type of search there as we use for users and simplify this whole thing a lot.
58
58
  #
59
59
  # @returns Hash a ruby hash of the JSON data
60
- def get_content_of_subpage_from_url(url, subpage_search, additional_search_parameters = nil, post_data_include: nil)
60
+ def get_content_of_subpage_from_url(url, subpage_search, additional_search_parameters = nil, post_data_include: nil, header: nil)
61
61
  # So this is fun:
62
62
  # For pages marked as misinformation we have to use one method (interception of requrest) and
63
63
  # for pages that are not, we can just pull the data straight from the page.
@@ -73,25 +73,22 @@ module Zorki
73
73
  page.driver.browser.intercept do |request, &continue|
74
74
  # This passes the request forward unmodified, since we only care about the response
75
75
  continue.call(request) && next unless request.url.include?(subpage_search)
76
- continue.call(request) && next unless !post_data_include.nil? && request.post_data&.include?(post_data_include)
76
+ if !header.nil?
77
+ header_key = header.keys.first.to_s
78
+ header_value = header.values.first
79
+
80
+ puts "Request Header included? #{request.headers.include?(header_key)} #{request.headers[header_key]} == #{header_value}"
81
+ continue.call(request) && next unless request.headers.include?(header_key) && request.headers[header_key] == header_value
82
+
83
+ elsif !post_data_include.nil?
84
+ continue.call(request) && next unless request.post_data&.include?(post_data_include)
85
+ end
77
86
 
78
87
  continue.call(request) do |response|
79
88
  # Check if not a CORS prefetch and finish up if not
80
89
  if !response.body&.empty? && response.body
81
90
  check_passed = true
82
91
 
83
- if !additional_search_parameters.nil? && post_data_include.nil?
84
- body_to_check = Oj.load(response.body)
85
-
86
- search_parameters = additional_search_parameters.split(",")
87
- search_parameters.each_with_index do |key, index|
88
- break if body_to_check.nil?
89
-
90
- check_passed = false unless body_to_check.has_key?(key)
91
- body_to_check = body_to_check[key]
92
- end
93
- end
94
-
95
92
  next if check_passed == false
96
93
  response_body = response.body if check_passed == true
97
94
  end
@@ -108,42 +105,17 @@ module Zorki
108
105
  page.driver.browser.navigate.to(url)
109
106
  # We wait until the correct intercept is processed or we've waited 60 seconds
110
107
  start_time = Time.now
111
- # puts "Waiting.... #{url}"
112
-
113
- sleep(rand(1...10))
114
108
  while response_body.nil? && (Time.now - start_time) < 60
115
109
  sleep(0.1)
116
110
  end
117
111
 
118
112
  page.driver.execute_script("window.stop();")
119
113
 
120
- # If this is a page that has not been marked as misinfo we can just pull the data
121
- # TODO: put this before the whole load loop
122
- if response_body.nil?
123
- doc = Nokogiri::HTML(page.driver.browser.page_source)
124
- # elements = doc.search("script").find_all do |e|
125
- # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
126
- # end
127
-
128
- elements = doc.search("script").filter_map do |element|
129
- parsed_element_json = nil
130
- begin
131
- element_json = Oj.load(element.text)
132
- parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
133
- rescue StandardError
134
- next
135
- end
136
-
137
- parsed_element_json
138
- end
139
-
140
- if elements&.empty?
141
- raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
142
- end
143
-
144
- return elements
145
- end
114
+ # 1. Fix the ability to dettect if a page is removed -DONE
115
+ # 2. Fix videos for slideshows - Works for reels?
116
+ # 3. Public liinks
146
117
 
118
+ # Check if something failed before we continue. Use the fake test to test
147
119
  raise ContentUnavailableError.new("Response body nil") if response_body.nil?
148
120
  Oj.load(response_body)
149
121
  ensure
@@ -26,7 +26,7 @@ module Zorki
26
26
 
27
27
  # This is searching for a specific request, the reason it's weird is because it's uri encoded
28
28
  # graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", "data,user,media_count", post_data_include: "render_surface%22%3A%22PROFILE")
29
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", nil, post_data_include: "render_surface%22%3A%22PROFILE")
29
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql/query", nil, post_data_include: "render_surface%22%3A%22PROFILE", header: { "X-FB-Friendly-Name": "PolarisProfilePageContentQuery" })
30
30
  graphql_script = graphql_script.first if graphql_script.class == Array
31
31
 
32
32
  if graphql_script.nil?
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.2.5"
4
+ VERSION = "0.2.6"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-10-17 00:00:00.000000000 Z
11
+ date: 2024-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara