zorki 0.1.29 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/zorki/scrapers/scraper.rb +10 -7
- data/lib/zorki/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 360620c86ba91caf49a0e41a0362654620f60da706c761f3f9429757f570c7a4
|
4
|
+
data.tar.gz: 442628f21ce102c28e22f2aa0cc9243fe1c836267a6901cd53c5f094fe778444
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb9d411327c46d63229dfc8e43cfcfb43d14a4b6a87615d07aa7871a29a9d0fe7390ef018134eb358314300fa1a67c0e976a43fd0357e7eea0efac4b4afede30
|
7
|
+
data.tar.gz: 5ef69f54c80920ebfacf7520c851b06649b81262df809041bb6e97a981e8fd237ccea5b7d8972b4332e2a78fe25330171af9d0f62c9dfc2ccf2029cd0409044a
|
@@ -81,7 +81,6 @@ module Zorki
|
|
81
81
|
puts response.body
|
82
82
|
puts "***********************************************************"
|
83
83
|
|
84
|
-
# responses << response
|
85
84
|
# Check if not a CORS prefetch and finish up if not
|
86
85
|
if !response.body&.empty? && response.body
|
87
86
|
check_passed = true
|
@@ -103,6 +102,7 @@ module Zorki
|
|
103
102
|
puts "checking FAILED request: #{request.url}"
|
104
103
|
puts response.body
|
105
104
|
puts "***********************************************************"
|
105
|
+
next
|
106
106
|
end
|
107
107
|
|
108
108
|
response_body = response.body if check_passed == true
|
@@ -110,10 +110,10 @@ module Zorki
|
|
110
110
|
end
|
111
111
|
rescue Selenium::WebDriver::Error::WebDriverError
|
112
112
|
# Eat them
|
113
|
-
|
114
|
-
puts "***********************************************************"
|
115
|
-
puts "Error in intercept: #{e}"
|
116
|
-
puts "***********************************************************"
|
113
|
+
# rescue StandardError => e
|
114
|
+
# puts "***********************************************************"
|
115
|
+
# puts "Error in intercept: #{e}"
|
116
|
+
# puts "***********************************************************"
|
117
117
|
end
|
118
118
|
|
119
119
|
# Now that the intercept is set up, we visit the page we want
|
@@ -132,7 +132,6 @@ module Zorki
|
|
132
132
|
# If this is a page that has not been marked as misinfo we can just pull the data
|
133
133
|
# TODO: put this before the whole load loop
|
134
134
|
if response_body.nil?
|
135
|
-
|
136
135
|
doc = Nokogiri::HTML(page.driver.browser.page_source)
|
137
136
|
# elements = doc.search("script").find_all do |e|
|
138
137
|
# e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
|
@@ -141,7 +140,7 @@ module Zorki
|
|
141
140
|
elements = doc.search("script").filter_map do |element|
|
142
141
|
parsed_element_json = nil
|
143
142
|
begin
|
144
|
-
element_json =
|
143
|
+
element_json = Oj.load(element.text)
|
145
144
|
|
146
145
|
# if element.text.include?("jokoy.komi.io")
|
147
146
|
# debugger
|
@@ -155,6 +154,10 @@ module Zorki
|
|
155
154
|
|
156
155
|
parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
157
156
|
rescue StandardError
|
157
|
+
# puts "***********************************************************"
|
158
|
+
# puts "Error in parsing JSON: #{e}"
|
159
|
+
# puts e.backtrace
|
160
|
+
# puts "***********************************************************"
|
158
161
|
next
|
159
162
|
end
|
160
163
|
|
data/lib/zorki/version.rb
CHANGED