zorki 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e6710ae1d1737110bc8385113470b9523414c74ff3ed88e0fe507215ddaa24c
4
- data.tar.gz: 27df7892036e33313229ffefef35a6407667fd3d36275bee9ff933a07178a102
3
+ metadata.gz: fa95bb5ca131ca6b7faed2aab200329579b77012cb079da75a0ad90a60daa5bd
4
+ data.tar.gz: 82eb0cc29af2cfaeafa8774e027904959f953a50faa03d71dbcbeba9595ac520
5
5
  SHA512:
6
- metadata.gz: 6147d1586fbb95a28a018139e8bcb71a1f5731a3a15c65767ab43d0f29395db98f0a18f13080aeb2cd9eb82c909db9a65931ca6100beabbc08be01c343729e92
7
- data.tar.gz: 4bac8fc6d924404f1671c302c16e5eb895bef4ab9a63c03b5951b4f4c6f9e6314073468ebb5ad4bad3bda0819afcd096a53160950439e044fb55922f80c460cc
6
+ metadata.gz: 127638ce83ed09be71f194ba3e5dc269374890a1bd89874c8f158896a7664d41724698a258916f63e373b1c6f3dd3b4d315fac49a15e17ff457035ec4d345120
7
+ data.tar.gz: 105b7e148774a82640ed48bffe46b21d941ce0a18987f59ef0e79a3a058077fbd9dda651811bbc421ff9fcc8323de152b2ac5afbff0e5b9d4e5c65d27f5ac85c
@@ -41,7 +41,7 @@ module SeleniumMonkeypatch
41
41
  data[:sessionId] = @session_id if @session_id
42
42
  message = @ws.send_cmd(**data)
43
43
  if message.nil? == false && message["error"] && (method != "Fetch.continueRequest")
44
- raise Error::WebDriverError, error_message(message["error"])
44
+ raise Selenium::WebDriver::Error::WebDriverError, error_message(message["error"])
45
45
  end
46
46
 
47
47
  message
@@ -23,6 +23,9 @@ module Zorki
23
23
  raise ImageRequestZeroSize if count == 5
24
24
 
25
25
  result
26
+ ensure
27
+ page.quit
28
+ # Make sure it's quit? I'm not sure we really want to do this outside of testing.
26
29
  end
27
30
 
28
31
  def attempt_parse(id)
@@ -44,6 +47,7 @@ module Zorki
44
47
  "data,xdt_api__v1__media__shortcode__web_info,items"
45
48
  )
46
49
 
50
+
47
51
  graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
48
52
 
49
53
  # For pages that have been marked misinfo the structure is very different than not
@@ -61,6 +65,7 @@ module Zorki
61
65
  text = graphql_object["articleBody"]
62
66
  username = graphql_object["author"]["identifier"]["value"]
63
67
  # 2021-04-01T17:07:10-07:00
68
+
64
69
  date = DateTime.strptime(graphql_object["dateCreated"], "%Y-%m-%dT%H:%M:%S%z")
65
70
  interactions = graphql_object["interactionStatistic"]
66
71
  number_of_likes = interactions.select do |x|
@@ -72,11 +72,14 @@ module Zorki
72
72
 
73
73
  continue.call(request) do |response|
74
74
  # Check if not a CORS prefetch and finish up if not
75
- if !response.body.empty? && response.body
75
+ if !response.body&.empty? && response.body
76
76
  check_passed = true
77
+
77
78
  unless additional_search_parameters.nil?
78
79
  body_to_check = Oj.load(response.body)
79
80
 
81
+ debugger if body_to_check.include?("jokoy.komi.io")
82
+
80
83
  search_parameters = additional_search_parameters.split(",")
81
84
  search_parameters.each_with_index do |key, index|
82
85
  break if body_to_check.nil?
@@ -119,7 +122,17 @@ module Zorki
119
122
  begin
120
123
  element_json = JSON.parse(element.text)
121
124
 
122
- parsed_element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
125
+ if element.text.include?("jokoy.komi.io")
126
+ debugger
127
+ # if element_json["require"].first.last.first["__bbox"].key?("require")
128
+
129
+ # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
130
+ # debugger if x.to_s.include?("Si mulut pelaut")
131
+ # end
132
+ # end
133
+ end
134
+
135
+ parsed_element_json = element_json["require"].last.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
123
136
  rescue StandardError
124
137
  next
125
138
  end
@@ -137,9 +150,10 @@ module Zorki
137
150
  raise ContentUnavailableError.new("Response body nil") if response_body.nil?
138
151
  Oj.load(response_body)
139
152
  ensure
140
- page.quit
153
+ # page.quit
141
154
  # TRY THIS TO MAKE SURE CHROME GETS CLOSED?
142
155
  # We may also want to not do this and make sure the same browser is reused instead for cookie purposes
156
+ # NOW wer'e trying this 2024-05-28
143
157
  end
144
158
 
145
159
  private
@@ -18,7 +18,7 @@ module Zorki
18
18
  # - *Profile image
19
19
  login
20
20
 
21
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
21
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
22
22
  graphql_script = graphql_script.first if graphql_script.class == Array
23
23
 
24
24
  if graphql_script.nil?
@@ -62,13 +62,13 @@ module Zorki
62
62
  scraped_username = user["username"]
63
63
  raise Zorki::Error unless username == scraped_username
64
64
 
65
- profile_image_url = user["profile_pic_url_hd"]
65
+ profile_image_url = user["hd_profile_pic_url_info"]["url"]
66
66
  {
67
67
  name: user["full_name"],
68
68
  username: username,
69
- number_of_posts: user["edge_owner_to_timeline_media"]["count"],
70
- number_of_followers: user["edge_followed_by"]["count"],
71
- number_of_following: user["edge_follow"]["count"],
69
+ number_of_posts: user["media_count"],
70
+ number_of_followers: user["follower_count"],
71
+ number_of_following: user["following_count"],
72
72
  verified: user["is_verified"],
73
73
  profile: user["biography"],
74
74
  profile_link: user["external_url"],
@@ -76,6 +76,8 @@ module Zorki
76
76
  profile_image_url: profile_image_url
77
77
  }
78
78
  end
79
+ rescue Zorki::ContentUnavailableError
80
+ raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
79
81
  end
80
82
  end
81
83
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.22"
4
+ VERSION = "0.1.24"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -32,6 +32,17 @@ module Zorki
32
32
  end
33
33
  end
34
34
 
35
+ class UserScrapingError < Error
36
+ def initialize(msg = "Zorki encountered an error scraping a user", additional_data: {})
37
+ super(msg)
38
+ @additional_data = additional_data
39
+ end
40
+
41
+ def to_honeybadger_context
42
+ additional_data
43
+ end
44
+ end
45
+
35
46
  class RetryableError < Error; end
36
47
 
37
48
  class ImageRequestTimedOutError < RetryableError
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.22
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-29 00:00:00.000000000 Z
11
+ date: 2024-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara