zorki 0.1.29 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zorki/scrapers/scraper.rb +10 -7
- data/lib/zorki/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 360620c86ba91caf49a0e41a0362654620f60da706c761f3f9429757f570c7a4
|
4
|
+
data.tar.gz: 442628f21ce102c28e22f2aa0cc9243fe1c836267a6901cd53c5f094fe778444
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb9d411327c46d63229dfc8e43cfcfb43d14a4b6a87615d07aa7871a29a9d0fe7390ef018134eb358314300fa1a67c0e976a43fd0357e7eea0efac4b4afede30
|
7
|
+
data.tar.gz: 5ef69f54c80920ebfacf7520c851b06649b81262df809041bb6e97a981e8fd237ccea5b7d8972b4332e2a78fe25330171af9d0f62c9dfc2ccf2029cd0409044a
|
@@ -81,7 +81,6 @@ module Zorki
|
|
81
81
|
puts response.body
|
82
82
|
puts "***********************************************************"
|
83
83
|
|
84
|
-
# responses << response
|
85
84
|
# Check if not a CORS prefetch and finish up if not
|
86
85
|
if !response.body&.empty? && response.body
|
87
86
|
check_passed = true
|
@@ -103,6 +102,7 @@ module Zorki
|
|
103
102
|
puts "checking FAILED request: #{request.url}"
|
104
103
|
puts response.body
|
105
104
|
puts "***********************************************************"
|
105
|
+
next
|
106
106
|
end
|
107
107
|
|
108
108
|
response_body = response.body if check_passed == true
|
@@ -110,10 +110,10 @@ module Zorki
|
|
110
110
|
end
|
111
111
|
rescue Selenium::WebDriver::Error::WebDriverError
|
112
112
|
# Eat them
|
113
|
-
|
114
|
-
puts "***********************************************************"
|
115
|
-
puts "Error in intercept: #{e}"
|
116
|
-
puts "***********************************************************"
|
113
|
+
# rescue StandardError => e
|
114
|
+
# puts "***********************************************************"
|
115
|
+
# puts "Error in intercept: #{e}"
|
116
|
+
# puts "***********************************************************"
|
117
117
|
end
|
118
118
|
|
119
119
|
# Now that the intercept is set up, we visit the page we want
|
@@ -132,7 +132,6 @@ module Zorki
|
|
132
132
|
# If this is a page that has not been marked as misinfo we can just pull the data
|
133
133
|
# TODO: put this before the whole load loop
|
134
134
|
if response_body.nil?
|
135
|
-
|
136
135
|
doc = Nokogiri::HTML(page.driver.browser.page_source)
|
137
136
|
# elements = doc.search("script").find_all do |e|
|
138
137
|
# e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
|
@@ -141,7 +140,7 @@ module Zorki
|
|
141
140
|
elements = doc.search("script").filter_map do |element|
|
142
141
|
parsed_element_json = nil
|
143
142
|
begin
|
144
|
-
element_json =
|
143
|
+
element_json = Oj.load(element.text)
|
145
144
|
|
146
145
|
# if element.text.include?("jokoy.komi.io")
|
147
146
|
# debugger
|
@@ -155,6 +154,10 @@ module Zorki
|
|
155
154
|
|
156
155
|
parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
157
156
|
rescue StandardError
|
157
|
+
# puts "***********************************************************"
|
158
|
+
# puts "Error in parsing JSON: #{e}"
|
159
|
+
# puts e.backtrace
|
160
|
+
# puts "***********************************************************"
|
158
161
|
next
|
159
162
|
end
|
160
163
|
|
data/lib/zorki/version.rb
CHANGED