zorki 0.1.22 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e6710ae1d1737110bc8385113470b9523414c74ff3ed88e0fe507215ddaa24c
4
- data.tar.gz: 27df7892036e33313229ffefef35a6407667fd3d36275bee9ff933a07178a102
3
+ metadata.gz: fa95bb5ca131ca6b7faed2aab200329579b77012cb079da75a0ad90a60daa5bd
4
+ data.tar.gz: 82eb0cc29af2cfaeafa8774e027904959f953a50faa03d71dbcbeba9595ac520
5
5
  SHA512:
6
- metadata.gz: 6147d1586fbb95a28a018139e8bcb71a1f5731a3a15c65767ab43d0f29395db98f0a18f13080aeb2cd9eb82c909db9a65931ca6100beabbc08be01c343729e92
7
- data.tar.gz: 4bac8fc6d924404f1671c302c16e5eb895bef4ab9a63c03b5951b4f4c6f9e6314073468ebb5ad4bad3bda0819afcd096a53160950439e044fb55922f80c460cc
6
+ metadata.gz: 127638ce83ed09be71f194ba3e5dc269374890a1bd89874c8f158896a7664d41724698a258916f63e373b1c6f3dd3b4d315fac49a15e17ff457035ec4d345120
7
+ data.tar.gz: 105b7e148774a82640ed48bffe46b21d941ce0a18987f59ef0e79a3a058077fbd9dda651811bbc421ff9fcc8323de152b2ac5afbff0e5b9d4e5c65d27f5ac85c
@@ -41,7 +41,7 @@ module SeleniumMonkeypatch
41
41
  data[:sessionId] = @session_id if @session_id
42
42
  message = @ws.send_cmd(**data)
43
43
  if message.nil? == false && message["error"] && (method != "Fetch.continueRequest")
44
- raise Error::WebDriverError, error_message(message["error"])
44
+ raise Selenium::WebDriver::Error::WebDriverError, error_message(message["error"])
45
45
  end
46
46
 
47
47
  message
@@ -23,6 +23,9 @@ module Zorki
23
23
  raise ImageRequestZeroSize if count == 5
24
24
 
25
25
  result
26
+ ensure
27
+ page.quit
28
+ # Make sure it's quit? I'm not sure we really want to do this outside of testing.
26
29
  end
27
30
 
28
31
  def attempt_parse(id)
@@ -44,6 +47,7 @@ module Zorki
44
47
  "data,xdt_api__v1__media__shortcode__web_info,items"
45
48
  )
46
49
 
50
+
47
51
  graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
48
52
 
49
53
  # For pages that have been marked misinfo the structure is very different than not
@@ -61,6 +65,7 @@ module Zorki
61
65
  text = graphql_object["articleBody"]
62
66
  username = graphql_object["author"]["identifier"]["value"]
63
67
  # 2021-04-01T17:07:10-07:00
68
+
64
69
  date = DateTime.strptime(graphql_object["dateCreated"], "%Y-%m-%dT%H:%M:%S%z")
65
70
  interactions = graphql_object["interactionStatistic"]
66
71
  number_of_likes = interactions.select do |x|
@@ -72,11 +72,14 @@ module Zorki
72
72
 
73
73
  continue.call(request) do |response|
74
74
  # Check if not a CORS prefetch and finish up if not
75
- if !response.body.empty? && response.body
75
+ if !response.body&.empty? && response.body
76
76
  check_passed = true
77
+
77
78
  unless additional_search_parameters.nil?
78
79
  body_to_check = Oj.load(response.body)
79
80
 
81
+ debugger if body_to_check.include?("jokoy.komi.io")
82
+
80
83
  search_parameters = additional_search_parameters.split(",")
81
84
  search_parameters.each_with_index do |key, index|
82
85
  break if body_to_check.nil?
@@ -119,7 +122,17 @@ module Zorki
119
122
  begin
120
123
  element_json = JSON.parse(element.text)
121
124
 
122
- parsed_element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
125
+ if element.text.include?("jokoy.komi.io")
126
+ debugger
127
+ # if element_json["require"].first.last.first["__bbox"].key?("require")
128
+
129
+ # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
130
+ # debugger if x.to_s.include?("Si mulut pelaut")
131
+ # end
132
+ # end
133
+ end
134
+
135
+ parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
123
136
  rescue StandardError
124
137
  next
125
138
  end
@@ -137,9 +150,10 @@ module Zorki
137
150
  raise ContentUnavailableError.new("Response body nil") if response_body.nil?
138
151
  Oj.load(response_body)
139
152
  ensure
140
- page.quit
153
+ # page.quit
141
154
  # TRY THIS TO MAKE SURE CHROME GETS CLOSED?
142
155
  # We may also want to not do this and make sure the same browser is reused instead for cookie purposes
156
+ # NOW wer'e trying this 2024-05-28
143
157
  end
144
158
 
145
159
  private
@@ -18,7 +18,7 @@ module Zorki
18
18
  # - *Profile image
19
19
  login
20
20
 
21
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
21
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
22
22
  graphql_script = graphql_script.first if graphql_script.class == Array
23
23
 
24
24
  if graphql_script.nil?
@@ -62,13 +62,13 @@ module Zorki
62
62
  scraped_username = user["username"]
63
63
  raise Zorki::Error unless username == scraped_username
64
64
 
65
- profile_image_url = user["profile_pic_url_hd"]
65
+ profile_image_url = user["hd_profile_pic_url_info"]["url"]
66
66
  {
67
67
  name: user["full_name"],
68
68
  username: username,
69
- number_of_posts: user["edge_owner_to_timeline_media"]["count"],
70
- number_of_followers: user["edge_followed_by"]["count"],
71
- number_of_following: user["edge_follow"]["count"],
69
+ number_of_posts: user["media_count"],
70
+ number_of_followers: user["follower_count"],
71
+ number_of_following: user["following_count"],
72
72
  verified: user["is_verified"],
73
73
  profile: user["biography"],
74
74
  profile_link: user["external_url"],
@@ -76,6 +76,8 @@ module Zorki
76
76
  profile_image_url: profile_image_url
77
77
  }
78
78
  end
79
+ rescue Zorki::ContentUnavailableError
80
+ raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
79
81
  end
80
82
  end
81
83
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.22"
4
+ VERSION = "0.1.24"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -32,6 +32,17 @@ module Zorki
32
32
  end
33
33
  end
34
34
 
35
+ class UserScrapingError < Error
36
+ def initialize(msg = "Zorki encountered an error scraping a user", additional_data: {})
37
+ super(msg)
38
+ @additional_data = additional_data
39
+ end
40
+
41
+ def to_honeybadger_context
42
+ additional_data
43
+ end
44
+ end
45
+
35
46
  class RetryableError < Error; end
36
47
 
37
48
  class ImageRequestTimedOutError < RetryableError
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.22
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-29 00:00:00.000000000 Z
11
+ date: 2024-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara