zorki 0.1.22 → 0.1.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/zorki/monkeypatch.rb +1 -1
- data/lib/zorki/scrapers/post_scraper.rb +5 -0
- data/lib/zorki/scrapers/scraper.rb +14 -3
- data/lib/zorki/scrapers/user_scraper.rb +5 -5
- data/lib/zorki/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59b7b2ee84517a52b4c4bfaa300fe18d5981789c92c5983cc07d2cbe21323a79
|
4
|
+
data.tar.gz: 6d24c1955520c08164da77783dbbc17d8db09edfa8de30103a3ddfcdaeaf8064
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bb4c7df50e497920f6e21216268a2640128f0b0fee6d443827ffe372f720a63b03fe6ad0c62d2a78862698989e7b62929bd9d0e9c54e5dcf12c8a5ccca85719
|
7
|
+
data.tar.gz: 1cee6eafb78aaf2bcadc855f44676501996ce83c9fc5a3ecafc5303fc7a62d2ee7aa0730b78477c8578fd1edc3e897317ee6014cfd35b38d5743816ebf8307e4
|
data/lib/zorki/monkeypatch.rb
CHANGED
@@ -41,7 +41,7 @@ module SeleniumMonkeypatch
|
|
41
41
|
data[:sessionId] = @session_id if @session_id
|
42
42
|
message = @ws.send_cmd(**data)
|
43
43
|
if message.nil? == false && message["error"] && (method != "Fetch.continueRequest")
|
44
|
-
raise Error::WebDriverError, error_message(message["error"])
|
44
|
+
raise Selenium::WebDriver::Error::WebDriverError, error_message(message["error"])
|
45
45
|
end
|
46
46
|
|
47
47
|
message
|
@@ -23,6 +23,9 @@ module Zorki
|
|
23
23
|
raise ImageRequestZeroSize if count == 5
|
24
24
|
|
25
25
|
result
|
26
|
+
ensure
|
27
|
+
page.quit
|
28
|
+
# Make sure it's quit? I'm not sure we really want to do this outside of testing.
|
26
29
|
end
|
27
30
|
|
28
31
|
def attempt_parse(id)
|
@@ -44,6 +47,7 @@ module Zorki
|
|
44
47
|
"data,xdt_api__v1__media__shortcode__web_info,items"
|
45
48
|
)
|
46
49
|
|
50
|
+
|
47
51
|
graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
|
48
52
|
|
49
53
|
# For pages that have been marked misinfo the structure is very different than not
|
@@ -61,6 +65,7 @@ module Zorki
|
|
61
65
|
text = graphql_object["articleBody"]
|
62
66
|
username = graphql_object["author"]["identifier"]["value"]
|
63
67
|
# 2021-04-01T17:07:10-07:00
|
68
|
+
|
64
69
|
date = DateTime.strptime(graphql_object["dateCreated"], "%Y-%m-%dT%H:%M:%S%z")
|
65
70
|
interactions = graphql_object["interactionStatistic"]
|
66
71
|
number_of_likes = interactions.select do |x|
|
@@ -72,8 +72,9 @@ module Zorki
|
|
72
72
|
|
73
73
|
continue.call(request) do |response|
|
74
74
|
# Check if not a CORS prefetch and finish up if not
|
75
|
-
if !response.body
|
75
|
+
if !response.body&.empty? && response.body
|
76
76
|
check_passed = true
|
77
|
+
|
77
78
|
unless additional_search_parameters.nil?
|
78
79
|
body_to_check = Oj.load(response.body)
|
79
80
|
|
@@ -119,7 +120,16 @@ module Zorki
|
|
119
120
|
begin
|
120
121
|
element_json = JSON.parse(element.text)
|
121
122
|
|
122
|
-
|
123
|
+
if element.text.include?("Dwayne")
|
124
|
+
# if element_json["require"].first.last.first["__bbox"].key?("require")
|
125
|
+
|
126
|
+
# element_json["require"].first.last.first["__bbox"]["require"].each do |x|
|
127
|
+
# debugger if x.to_s.include?("Dwayne Johnson")
|
128
|
+
# end
|
129
|
+
# end
|
130
|
+
end
|
131
|
+
|
132
|
+
parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
|
123
133
|
rescue StandardError
|
124
134
|
next
|
125
135
|
end
|
@@ -137,9 +147,10 @@ module Zorki
|
|
137
147
|
raise ContentUnavailableError.new("Response body nil") if response_body.nil?
|
138
148
|
Oj.load(response_body)
|
139
149
|
ensure
|
140
|
-
page.quit
|
150
|
+
# page.quit
|
141
151
|
# TRY THIS TO MAKE SURE CHROME GETS CLOSED?
|
142
152
|
# We may also want to not do this and make sure the same browser is reused instead for cookie purposes
|
153
|
+
# NOW wer'e trying this 2024-05-28
|
143
154
|
end
|
144
155
|
|
145
156
|
private
|
@@ -18,7 +18,7 @@ module Zorki
|
|
18
18
|
# - *Profile image
|
19
19
|
login
|
20
20
|
|
21
|
-
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "
|
21
|
+
graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
|
22
22
|
graphql_script = graphql_script.first if graphql_script.class == Array
|
23
23
|
|
24
24
|
if graphql_script.nil?
|
@@ -62,13 +62,13 @@ module Zorki
|
|
62
62
|
scraped_username = user["username"]
|
63
63
|
raise Zorki::Error unless username == scraped_username
|
64
64
|
|
65
|
-
profile_image_url = user["
|
65
|
+
profile_image_url = user["hd_profile_pic_url_info"]["url"]
|
66
66
|
{
|
67
67
|
name: user["full_name"],
|
68
68
|
username: username,
|
69
|
-
number_of_posts: user["
|
70
|
-
number_of_followers: user["
|
71
|
-
number_of_following: user["
|
69
|
+
number_of_posts: user["media_count"],
|
70
|
+
number_of_followers: user["follower_count"],
|
71
|
+
number_of_following: user["following_count"],
|
72
72
|
verified: user["is_verified"],
|
73
73
|
profile: user["biography"],
|
74
74
|
profile_link: user["external_url"],
|
data/lib/zorki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zorki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|