zorki 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e9aca1027df18f607fac0616a75a4b8ac11728cbac5de9205f014c57306f82e
4
- data.tar.gz: c22375fd87090060642780ae2ec472d505f66c7ed1579d35d8eb0e9fe02bd976
3
+ metadata.gz: 6dd3c28132011c0d9d42875803face311af01a50bbbfa9cf4f07ca89a63029d4
4
+ data.tar.gz: 91fa19abbd41551e4cb55ff34ce3977795903312c06506bf9bb450796cca7189
5
5
  SHA512:
6
- metadata.gz: b3e3117ca0903a23fd2303ec938d70d1d8dd8f82eeb510019832e25e286a4c5ae665c707cc62523dedf9617de9239072aa02d8877fb8e2fae2ea4f85ad14bbc9
7
- data.tar.gz: 7a2b9d079041484f5553bf5be6fa391bbeb20c8660fb07df69cd7b6feb28d8f7d315e22353d89ff1ccedf7304e39dfbab9f8f18e9f25ad44cd7067e473d515d1
6
+ metadata.gz: '087a14d77466f9b5f70014f5f09ae97542d2b8083499917d0397815dd4c298da07da9c4b28fb31e6f7c1949a356e9c4d32b8bb8e7551054ca1fe5c37c972515b'
7
+ data.tar.gz: 5b46e4e7edb229d89ff5f9bb740dac2959c314e09cf5b9a770c8b16779509266a8bd06d38b1a1208f547cfef22e10fd139b0f32dbea9c89dba5413874399743a
@@ -23,6 +23,8 @@ module Zorki
23
23
  "data,xdt_api__v1__media__shortcode__web_info,items"
24
24
  )
25
25
 
26
+ graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
27
+
26
28
  # For pages that have been marked misinfo the structure is very different than not
27
29
  # If it is a clean post then it's just a schema.org thing, but if it's misinfo it's the old
28
30
  # way of deeply nested stuff.
@@ -54,7 +56,8 @@ module Zorki
54
56
  else
55
57
  # We need to see if this is a single image post or a slideshow. We do that
56
58
  # by looking for a single image, if it's not there, we assume the alternative.
57
- graphql_object = graphql_object["data"]["xdt_api__v1__media__shortcode__web_info"]
59
+ # debugger
60
+ # graphql_object = graphql_object["data"]["xdt_api__v1__media__shortcode__web_info"]
58
61
 
59
62
  unless graphql_object["items"][0].has_key?("video_versions") && !graphql_object["items"][0]["video_versions"].nil?
60
63
  # Check if there is a slideshow or not
@@ -5,9 +5,9 @@ require "dotenv/load"
5
5
  require "oj"
6
6
  require "selenium-webdriver"
7
7
  require "logger"
8
- require "debug"
9
8
  require "securerandom"
10
9
  require "selenium/webdriver/remote/http/curb"
10
+ require "debug"
11
11
 
12
12
  # 2022-06-07 14:15:23 WARN Selenium [DEPRECATION] [:browser_options] :options as a parameter for driver initialization is deprecated. Use :capabilities with an Array of value capabilities/options if necessary instead.
13
13
 
@@ -112,12 +112,27 @@ module Zorki
112
112
  # TODO: put this before the whole load loop
113
113
  if response_body.nil?
114
114
  doc = Nokogiri::HTML(page.driver.browser.page_source)
115
- elements = doc.search("script").find_all do |e|
116
- e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
117
- end
115
+ # elements = doc.search("script").find_all do |e|
116
+ # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
117
+ # end
118
+
119
+ elements = doc.search("script").map do |element|
120
+ element_json = nil
121
+ begin
122
+ element_json = JSON.parse(element)
123
+
124
+ element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
125
+ rescue StandardError => e
126
+ next
127
+ end
128
+
129
+ element_json
130
+ end.compact
118
131
 
119
- raise ContentUnavailableError if elements&.empty?
120
- return Oj.load(elements.first.text)
132
+ if elements&.empty?
133
+ raise ContentUnavailableError
134
+ end
135
+ return elements
121
136
  end
122
137
 
123
138
  raise ContentUnavailableError if response_body.nil?
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-12 00:00:00.000000000 Z
11
+ date: 2023-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara