zorki 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -3
- data/lib/zorki/scrapers/post_scraper.rb +0 -1
- data/lib/zorki/scrapers/scraper.rb +24 -10
- data/lib/zorki/scrapers/user_scraper.rb +4 -0
- data/lib/zorki/version.rb +1 -1
- data/lib/zorki.rb +9 -2
- data/zorki.gemspec +2 -2
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eadd7ea03af064623b2e0a3deebc32bfa424ac60aab4713b61e3db94c10ee66
|
4
|
+
data.tar.gz: c4cd5d7bb8148eb23fb7d5b80f08f9118a9b39f717ca395a38b1abca34a7ce70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcfc9a4129cf62063c34461cfb1ee119ee702a64ed892b10909ddacb4cae544e7875eed7a53355db60c67f590292be951cd515d516b75456aae281a29ea3cbc5
|
7
|
+
data.tar.gz: feb1ca4d372217487c02311241ae9c4379eacc82336cc91e48c7f80ba68bef3538a14b0f17fb77e254d68203c77d4dcba9559b832c6dd9eda4879426a0e19c83
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
zorki (0.1.
|
4
|
+
zorki (0.1.8)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -59,6 +59,9 @@ GEM
|
|
59
59
|
connection_pool (2.4.1)
|
60
60
|
crass (1.0.6)
|
61
61
|
curb (1.0.5)
|
62
|
+
debug (1.8.0)
|
63
|
+
irb (>= 1.5.0)
|
64
|
+
reline (>= 0.3.1)
|
62
65
|
dotenv (2.7.6)
|
63
66
|
drb (2.1.1)
|
64
67
|
ruby2_keywords
|
@@ -162,9 +165,9 @@ GEM
|
|
162
165
|
ruby-progressbar (1.13.0)
|
163
166
|
ruby2_keywords (0.0.5)
|
164
167
|
rubyzip (2.3.2)
|
165
|
-
selenium-devtools (0.
|
168
|
+
selenium-devtools (0.120.0)
|
166
169
|
selenium-webdriver (~> 4.2)
|
167
|
-
selenium-webdriver (4.
|
170
|
+
selenium-webdriver (4.16.0)
|
168
171
|
rexml (~> 3.2, >= 3.2.5)
|
169
172
|
rubyzip (>= 1.2.2, < 3.0)
|
170
173
|
websocket (~> 1.0)
|
@@ -189,6 +192,7 @@ PLATFORMS
|
|
189
192
|
|
190
193
|
DEPENDENCIES
|
191
194
|
curb (~> 1.0, >= 1.0.5)
|
195
|
+
debug
|
192
196
|
dotenv (~> 2.7.6)
|
193
197
|
minitest (~> 5.0)
|
194
198
|
rack (= 3.0.8)
|
@@ -114,27 +114,32 @@ module Zorki
|
|
114
114
|
# e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
|
115
115
|
# end
|
116
116
|
|
117
|
-
elements = doc.search("script").
|
118
|
-
|
117
|
+
elements = doc.search("script").filter_map do |element|
|
118
|
+
parsed_element_json = nil
|
119
119
|
begin
|
120
|
-
element_json = JSON.parse(element)
|
120
|
+
element_json = JSON.parse(element.text)
|
121
121
|
|
122
|
-
|
123
|
-
rescue StandardError
|
122
|
+
parsed_element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
123
|
+
rescue StandardError
|
124
124
|
next
|
125
125
|
end
|
126
126
|
|
127
|
-
|
128
|
-
end
|
127
|
+
parsed_element_json
|
128
|
+
end
|
129
129
|
|
130
130
|
if elements&.empty?
|
131
|
-
raise ContentUnavailableError
|
131
|
+
raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
|
132
132
|
end
|
133
|
+
|
133
134
|
return elements
|
134
135
|
end
|
135
136
|
|
136
|
-
raise ContentUnavailableError if response_body.nil?
|
137
|
+
raise ContentUnavailableError.new("Response body nil") if response_body.nil?
|
137
138
|
Oj.load(response_body)
|
139
|
+
ensure
|
140
|
+
page.quit
|
141
|
+
# TRY THIS TO MAKE SURE CHROME GETS CLOSED?
|
142
|
+
# We may also want to not do this and make sure the same browser is reused instead for cookie purposes
|
138
143
|
end
|
139
144
|
|
140
145
|
private
|
@@ -167,7 +172,7 @@ module Zorki
|
|
167
172
|
|
168
173
|
def login
|
169
174
|
# Reset the sessions so that there's nothing laying around
|
170
|
-
page.
|
175
|
+
# page.driver.browser.close
|
171
176
|
|
172
177
|
# Check if we're on a Instagram page already, if not visit it.
|
173
178
|
unless page.driver.browser.current_url.include? "instagram.com"
|
@@ -242,6 +247,15 @@ module Zorki
|
|
242
247
|
# Multiply everything and insure we get an integer back
|
243
248
|
(number * multiplier).to_i
|
244
249
|
end
|
250
|
+
|
251
|
+
# def reset_window
|
252
|
+
# old_handle = page.driver.browser.window_handle
|
253
|
+
# page.driver.browser.switch_to.new_window(:window)
|
254
|
+
# new_handle = page.driver.browser.window_handle
|
255
|
+
# page.driver.browser.switch_to.window(old_handle)
|
256
|
+
# page.driver.browser.close
|
257
|
+
# page.driver.browser.switch_to.window(new_handle)
|
258
|
+
# end
|
245
259
|
end
|
246
260
|
end
|
247
261
|
|
@@ -21,6 +21,10 @@ module Zorki
|
|
21
21
|
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
|
22
22
|
graphql_script = graphql_script.first if graphql_script.class == Array
|
23
23
|
|
24
|
+
if graphql_script.nil?
|
25
|
+
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
|
26
|
+
end
|
27
|
+
|
24
28
|
if graphql_script.has_key?("author") && !graphql_script["author"].nil?
|
25
29
|
user = graphql_script["author"]
|
26
30
|
|
data/lib/zorki/version.rb
CHANGED
data/lib/zorki.rb
CHANGED
@@ -20,8 +20,15 @@ module Zorki
|
|
20
20
|
end
|
21
21
|
|
22
22
|
class ContentUnavailableError < Error
|
23
|
-
|
24
|
-
|
23
|
+
attr_reader :additional_data
|
24
|
+
|
25
|
+
def initialize(msg = "Zorki could not find content requested", additional_data: {})
|
26
|
+
super(msg)
|
27
|
+
@additional_data = additional_data
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_honeybadger_context
|
31
|
+
additional_data
|
25
32
|
end
|
26
33
|
end
|
27
34
|
|
data/zorki.gemspec
CHANGED
@@ -29,8 +29,6 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
|
-
# Uncomment to register a new dependency of your gem
|
33
|
-
# spec.add_dependency "example-gem", "~> 1.0"
|
34
32
|
spec.add_dependency "capybara" # For scraping and running browsers
|
35
33
|
spec.add_dependency "apparition" # A Chrome driver for Capybara
|
36
34
|
spec.add_dependency "typhoeus" # For making API requests
|
@@ -38,6 +36,8 @@ Gem::Specification.new do |spec|
|
|
38
36
|
spec.add_dependency "selenium-webdriver" # Webdriver selenium
|
39
37
|
spec.add_dependency "selenium-devtools" # Allow us to intercept requests
|
40
38
|
|
39
|
+
spec.add_development_dependency "debug"
|
40
|
+
|
41
41
|
# For more information and examples about making a new gem, checkout our
|
42
42
|
# guide at: https://bundler.io/guides/creating_gem.html
|
43
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zorki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: debug
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description:
|
98
112
|
email:
|
99
113
|
- cguess@gmail.com
|