zorki 0.1.7 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -3
- data/lib/zorki/scrapers/post_scraper.rb +0 -1
- data/lib/zorki/scrapers/scraper.rb +24 -10
- data/lib/zorki/scrapers/user_scraper.rb +4 -0
- data/lib/zorki/version.rb +1 -1
- data/lib/zorki.rb +9 -2
- data/zorki.gemspec +2 -2
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eadd7ea03af064623b2e0a3deebc32bfa424ac60aab4713b61e3db94c10ee66
|
4
|
+
data.tar.gz: c4cd5d7bb8148eb23fb7d5b80f08f9118a9b39f717ca395a38b1abca34a7ce70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcfc9a4129cf62063c34461cfb1ee119ee702a64ed892b10909ddacb4cae544e7875eed7a53355db60c67f590292be951cd515d516b75456aae281a29ea3cbc5
|
7
|
+
data.tar.gz: feb1ca4d372217487c02311241ae9c4379eacc82336cc91e48c7f80ba68bef3538a14b0f17fb77e254d68203c77d4dcba9559b832c6dd9eda4879426a0e19c83
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
zorki (0.1.
|
4
|
+
zorki (0.1.8)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -59,6 +59,9 @@ GEM
|
|
59
59
|
connection_pool (2.4.1)
|
60
60
|
crass (1.0.6)
|
61
61
|
curb (1.0.5)
|
62
|
+
debug (1.8.0)
|
63
|
+
irb (>= 1.5.0)
|
64
|
+
reline (>= 0.3.1)
|
62
65
|
dotenv (2.7.6)
|
63
66
|
drb (2.1.1)
|
64
67
|
ruby2_keywords
|
@@ -162,9 +165,9 @@ GEM
|
|
162
165
|
ruby-progressbar (1.13.0)
|
163
166
|
ruby2_keywords (0.0.5)
|
164
167
|
rubyzip (2.3.2)
|
165
|
-
selenium-devtools (0.
|
168
|
+
selenium-devtools (0.120.0)
|
166
169
|
selenium-webdriver (~> 4.2)
|
167
|
-
selenium-webdriver (4.
|
170
|
+
selenium-webdriver (4.16.0)
|
168
171
|
rexml (~> 3.2, >= 3.2.5)
|
169
172
|
rubyzip (>= 1.2.2, < 3.0)
|
170
173
|
websocket (~> 1.0)
|
@@ -189,6 +192,7 @@ PLATFORMS
|
|
189
192
|
|
190
193
|
DEPENDENCIES
|
191
194
|
curb (~> 1.0, >= 1.0.5)
|
195
|
+
debug
|
192
196
|
dotenv (~> 2.7.6)
|
193
197
|
minitest (~> 5.0)
|
194
198
|
rack (= 3.0.8)
|
@@ -114,27 +114,32 @@ module Zorki
|
|
114
114
|
# e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
|
115
115
|
# end
|
116
116
|
|
117
|
-
elements = doc.search("script").
|
118
|
-
|
117
|
+
elements = doc.search("script").filter_map do |element|
|
118
|
+
parsed_element_json = nil
|
119
119
|
begin
|
120
|
-
element_json = JSON.parse(element)
|
120
|
+
element_json = JSON.parse(element.text)
|
121
121
|
|
122
|
-
|
123
|
-
rescue StandardError
|
122
|
+
parsed_element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
123
|
+
rescue StandardError
|
124
124
|
next
|
125
125
|
end
|
126
126
|
|
127
|
-
|
128
|
-
end
|
127
|
+
parsed_element_json
|
128
|
+
end
|
129
129
|
|
130
130
|
if elements&.empty?
|
131
|
-
raise ContentUnavailableError
|
131
|
+
raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
|
132
132
|
end
|
133
|
+
|
133
134
|
return elements
|
134
135
|
end
|
135
136
|
|
136
|
-
raise ContentUnavailableError if response_body.nil?
|
137
|
+
raise ContentUnavailableError.new("Response body nil") if response_body.nil?
|
137
138
|
Oj.load(response_body)
|
139
|
+
ensure
|
140
|
+
page.quit
|
141
|
+
# TRY THIS TO MAKE SURE CHROME GETS CLOSED?
|
142
|
+
# We may also want to not do this and make sure the same browser is reused instead for cookie purposes
|
138
143
|
end
|
139
144
|
|
140
145
|
private
|
@@ -167,7 +172,7 @@ module Zorki
|
|
167
172
|
|
168
173
|
def login
|
169
174
|
# Reset the sessions so that there's nothing laying around
|
170
|
-
page.
|
175
|
+
# page.driver.browser.close
|
171
176
|
|
172
177
|
# Check if we're on a Instagram page already, if not visit it.
|
173
178
|
unless page.driver.browser.current_url.include? "instagram.com"
|
@@ -242,6 +247,15 @@ module Zorki
|
|
242
247
|
# Multiply everything and insure we get an integer back
|
243
248
|
(number * multiplier).to_i
|
244
249
|
end
|
250
|
+
|
251
|
+
# def reset_window
|
252
|
+
# old_handle = page.driver.browser.window_handle
|
253
|
+
# page.driver.browser.switch_to.new_window(:window)
|
254
|
+
# new_handle = page.driver.browser.window_handle
|
255
|
+
# page.driver.browser.switch_to.window(old_handle)
|
256
|
+
# page.driver.browser.close
|
257
|
+
# page.driver.browser.switch_to.window(new_handle)
|
258
|
+
# end
|
245
259
|
end
|
246
260
|
end
|
247
261
|
|
@@ -21,6 +21,10 @@ module Zorki
|
|
21
21
|
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
|
22
22
|
graphql_script = graphql_script.first if graphql_script.class == Array
|
23
23
|
|
24
|
+
if graphql_script.nil?
|
25
|
+
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
|
26
|
+
end
|
27
|
+
|
24
28
|
if graphql_script.has_key?("author") && !graphql_script["author"].nil?
|
25
29
|
user = graphql_script["author"]
|
26
30
|
|
data/lib/zorki/version.rb
CHANGED
data/lib/zorki.rb
CHANGED
@@ -20,8 +20,15 @@ module Zorki
|
|
20
20
|
end
|
21
21
|
|
22
22
|
class ContentUnavailableError < Error
|
23
|
-
|
24
|
-
|
23
|
+
attr_reader :additional_data
|
24
|
+
|
25
|
+
def initialize(msg = "Zorki could not find content requested", additional_data: {})
|
26
|
+
super(msg)
|
27
|
+
@additional_data = additional_data
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_honeybadger_context
|
31
|
+
additional_data
|
25
32
|
end
|
26
33
|
end
|
27
34
|
|
data/zorki.gemspec
CHANGED
@@ -29,8 +29,6 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
-
# Uncomment to register a new dependency of your gem
|
33
|
-
# spec.add_dependency "example-gem", "~> 1.0"
|
34
32
|
spec.add_dependency "capybara" # For scraping and running browsers
|
35
33
|
spec.add_dependency "apparition" # A Chrome driver for Capybara
|
36
34
|
spec.add_dependency "typhoeus" # For making API requests
|
@@ -38,6 +36,8 @@ Gem::Specification.new do |spec|
|
|
38
36
|
spec.add_dependency "selenium-webdriver" # Webdriver selenium
|
39
37
|
spec.add_dependency "selenium-devtools" # Allow us to intercept requests
|
40
38
|
|
39
|
+
spec.add_development_dependency "debug"
|
40
|
+
|
41
41
|
# For more information and examples about making a new gem, checkout our
|
42
42
|
# guide at: https://bundler.io/guides/creating_gem.html
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zorki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: debug
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description:
|
98
112
|
email:
|
99
113
|
- cguess@gmail.com
|