zorki 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/zorki/monkeypatch.rb +1 -1
- data/lib/zorki/scrapers/post_scraper.rb +5 -0
- data/lib/zorki/scrapers/scraper.rb +14 -3
- data/lib/zorki/scrapers/user_scraper.rb +5 -5
- data/lib/zorki/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59b7b2ee84517a52b4c4bfaa300fe18d5981789c92c5983cc07d2cbe21323a79
|
4
|
+
data.tar.gz: 6d24c1955520c08164da77783dbbc17d8db09edfa8de30103a3ddfcdaeaf8064
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bb4c7df50e497920f6e21216268a2640128f0b0fee6d443827ffe372f720a63b03fe6ad0c62d2a78862698989e7b62929bd9d0e9c54e5dcf12c8a5ccca85719
|
7
|
+
data.tar.gz: 1cee6eafb78aaf2bcadc855f44676501996ce83c9fc5a3ecafc5303fc7a62d2ee7aa0730b78477c8578fd1edc3e897317ee6014cfd35b38d5743816ebf8307e4
|
data/lib/zorki/monkeypatch.rb
CHANGED
@@ -41,7 +41,7 @@ module SeleniumMonkeypatch
|
|
41
41
|
data[:sessionId] = @session_id if @session_id
|
42
42
|
message = @ws.send_cmd(**data)
|
43
43
|
if message.nil? == false && message["error"] && (method != "Fetch.continueRequest")
|
44
|
-
raise Error::WebDriverError, error_message(message["error"])
|
44
|
+
raise Selenium::WebDriver::Error::WebDriverError, error_message(message["error"])
|
45
45
|
end
|
46
46
|
|
47
47
|
message
|
@@ -23,6 +23,9 @@ module Zorki
|
|
23
23
|
raise ImageRequestZeroSize if count == 5
|
24
24
|
|
25
25
|
result
|
26
|
+
ensure
|
27
|
+
page.quit
|
28
|
+
# Make sure it's quit? I'm not sure we really want to do this outside of testing.
|
26
29
|
end
|
27
30
|
|
28
31
|
def attempt_parse(id)
|
@@ -44,6 +47,7 @@ module Zorki
|
|
44
47
|
"data,xdt_api__v1__media__shortcode__web_info,items"
|
45
48
|
)
|
46
49
|
|
50
|
+
|
47
51
|
graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
|
48
52
|
|
49
53
|
# For pages that have been marked misinfo the structure is very different than not
|
@@ -61,6 +65,7 @@ module Zorki
|
|
61
65
|
text = graphql_object["articleBody"]
|
62
66
|
username = graphql_object["author"]["identifier"]["value"]
|
63
67
|
# 2021-04-01T17:07:10-07:00
|
68
|
+
|
64
69
|
date = DateTime.strptime(graphql_object["dateCreated"], "%Y-%m-%dT%H:%M:%S%z")
|
65
70
|
interactions = graphql_object["interactionStatistic"]
|
66
71
|
number_of_likes = interactions.select do |x|
|
@@ -72,8 +72,9 @@ module Zorki
|
|
72
72
|
|
73
73
|
continue.call(request) do |response|
|
74
74
|
# Check if not a CORS prefetch and finish up if not
|
75
|
-
if !response.body
|
75
|
+
if !response.body&.empty? && response.body
|
76
76
|
check_passed = true
|
77
|
+
|
77
78
|
unless additional_search_parameters.nil?
|
78
79
|
body_to_check = Oj.load(response.body)
|
79
80
|
|
@@ -119,7 +120,16 @@ module Zorki
|
|
119
120
|
begin
|
120
121
|
element_json = JSON.parse(element.text)
|
121
122
|
|
122
|
-
|
123
|
+
if element.text.include?("Dwayne")
|
124
|
+
# if element_json["require"].first.last.first["__bbox"].key?("require")
|
125
|
+
|
126
|
+
# element_json["require"].first.last.first["__bbox"]["require"].each do |x|
|
127
|
+
# debugger if x.to_s.include?("Dwayne Johnson")
|
128
|
+
# end
|
129
|
+
# end
|
130
|
+
end
|
131
|
+
|
132
|
+
parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
123
133
|
rescue StandardError
|
124
134
|
next
|
125
135
|
end
|
@@ -137,9 +147,10 @@ module Zorki
|
|
137
147
|
raise ContentUnavailableError.new("Response body nil") if response_body.nil?
|
138
148
|
Oj.load(response_body)
|
139
149
|
ensure
|
140
|
-
page.quit
|
150
|
+
# page.quit
|
141
151
|
# TRY THIS TO MAKE SURE CHROME GETS CLOSED?
|
142
152
|
# We may also want to not do this and make sure the same browser is reused instead for cookie purposes
|
153
|
+
# NOW wer'e trying this 2024-05-28
|
143
154
|
end
|
144
155
|
|
145
156
|
private
|
@@ -18,7 +18,7 @@ module Zorki
|
|
18
18
|
# - *Profile image
|
19
19
|
login
|
20
20
|
|
21
|
-
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "
|
21
|
+
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
|
22
22
|
graphql_script = graphql_script.first if graphql_script.class == Array
|
23
23
|
|
24
24
|
if graphql_script.nil?
|
@@ -62,13 +62,13 @@ module Zorki
|
|
62
62
|
scraped_username = user["username"]
|
63
63
|
raise Zorki::Error unless username == scraped_username
|
64
64
|
|
65
|
-
profile_image_url = user["
|
65
|
+
profile_image_url = user["hd_profile_pic_url_info"]["url"]
|
66
66
|
{
|
67
67
|
name: user["full_name"],
|
68
68
|
username: username,
|
69
|
-
number_of_posts: user["
|
70
|
-
number_of_followers: user["
|
71
|
-
number_of_following: user["
|
69
|
+
number_of_posts: user["media_count"],
|
70
|
+
number_of_followers: user["follower_count"],
|
71
|
+
number_of_following: user["following_count"],
|
72
72
|
verified: user["is_verified"],
|
73
73
|
profile: user["biography"],
|
74
74
|
profile_link: user["external_url"],
|
data/lib/zorki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zorki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|