zorki 0.1.23 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 59b7b2ee84517a52b4c4bfaa300fe18d5981789c92c5983cc07d2cbe21323a79
4
- data.tar.gz: 6d24c1955520c08164da77783dbbc17d8db09edfa8de30103a3ddfcdaeaf8064
3
+ metadata.gz: ec1dba1c80d66f33d1e50fc104511a5842091529c6d2b1e3f4758508ae37468a
4
+ data.tar.gz: b510d48cbc12d0e9b70fb17fb91aeffdad51d85f85e5813695df1c86279894db
5
5
  SHA512:
6
- metadata.gz: 3bb4c7df50e497920f6e21216268a2640128f0b0fee6d443827ffe372f720a63b03fe6ad0c62d2a78862698989e7b62929bd9d0e9c54e5dcf12c8a5ccca85719
7
- data.tar.gz: 1cee6eafb78aaf2bcadc855f44676501996ce83c9fc5a3ecafc5303fc7a62d2ee7aa0730b78477c8578fd1edc3e897317ee6014cfd35b38d5743816ebf8307e4
6
+ metadata.gz: 6463fc59b818e21d4c515212d04549b7a3b925433d7e9df79ff376a786f74e028ab99146b8422c64384470443d88835854717e3a19a7c68bb3a767e7277f1c39
7
+ data.tar.gz: 173b570bb5eb62d759a488c21aff0092c633db1f0f1a4fa771e46ad32141a2016386f698e724cbc1a7dced5ad66a9d7b22add8f7dd533d8dc88ec009ca9a2814
data/Gemfile.lock CHANGED
@@ -1,35 +1,36 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- zorki (0.1.9)
4
+ zorki (0.1.25)
5
5
  apparition
6
6
  capybara
7
7
  oj
8
8
  selenium-devtools
9
- selenium-webdriver (~> 4.19)
9
+ selenium-webdriver (~> 4.24.0)
10
10
  typhoeus
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- activesupport (7.1.3.2)
15
+ activesupport (7.2.1)
16
16
  base64
17
17
  bigdecimal
18
- concurrent-ruby (~> 1.0, >= 1.0.2)
18
+ concurrent-ruby (~> 1.0, >= 1.3.1)
19
19
  connection_pool (>= 2.2.5)
20
20
  drb
21
21
  i18n (>= 1.6, < 2)
22
+ logger (>= 1.4.2)
22
23
  minitest (>= 5.1)
23
- mutex_m
24
- tzinfo (~> 2.0)
25
- addressable (2.8.6)
26
- public_suffix (>= 2.0.2, < 6.0)
24
+ securerandom (>= 0.3)
25
+ tzinfo (~> 2.0, >= 2.0.5)
26
+ addressable (2.8.7)
27
+ public_suffix (>= 2.0.2, < 7.0)
27
28
  apparition (0.6.0)
28
29
  capybara (~> 3.13, < 4)
29
30
  websocket-driver (>= 0.6.5)
30
31
  ast (2.4.2)
31
32
  base64 (0.2.0)
32
- bigdecimal (3.1.7)
33
+ bigdecimal (3.1.8)
33
34
  capybara (3.40.0)
34
35
  addressable
35
36
  matrix
@@ -39,75 +40,76 @@ GEM
39
40
  rack-test (>= 0.6.3)
40
41
  regexp_parser (>= 1.5, < 3.0)
41
42
  xpath (~> 3.2)
42
- concurrent-ruby (1.2.3)
43
+ concurrent-ruby (1.3.4)
43
44
  connection_pool (2.4.1)
44
- curb (1.0.5)
45
- debug (1.9.1)
45
+ curb (1.0.6)
46
+ debug (1.9.2)
46
47
  irb (~> 1.10)
47
48
  reline (>= 0.3.8)
48
49
  dotenv (2.7.6)
49
50
  drb (2.2.1)
50
51
  ethon (0.16.0)
51
52
  ffi (>= 1.15.0)
52
- ffi (1.16.3)
53
- i18n (1.14.4)
53
+ ffi (1.17.0-arm64-darwin)
54
+ i18n (1.14.5)
54
55
  concurrent-ruby (~> 1.0)
55
56
  io-console (0.7.2)
56
- irb (1.12.0)
57
- rdoc
57
+ irb (1.14.0)
58
+ rdoc (>= 4.0.0)
58
59
  reline (>= 0.4.2)
59
- json (2.7.1)
60
+ json (2.7.2)
60
61
  language_server-protocol (3.17.0.3)
62
+ logger (1.6.1)
61
63
  matrix (0.4.2)
62
64
  mini_mime (1.1.5)
63
- minitest (5.22.3)
64
- mutex_m (0.2.0)
65
- nokogiri (1.16.3-arm64-darwin)
65
+ minitest (5.25.1)
66
+ nokogiri (1.16.7-arm64-darwin)
66
67
  racc (~> 1.4)
67
- oj (3.16.3)
68
+ oj (3.16.6)
68
69
  bigdecimal (>= 3.0)
69
- parallel (1.24.0)
70
- parser (3.3.0.5)
70
+ ostruct (>= 0.2)
71
+ ostruct (0.6.0)
72
+ parallel (1.26.3)
73
+ parser (3.3.5.0)
71
74
  ast (~> 2.4.1)
72
75
  racc
73
76
  psych (5.1.2)
74
77
  stringio
75
- public_suffix (5.0.4)
76
- racc (1.7.3)
78
+ public_suffix (6.0.1)
79
+ racc (1.8.1)
77
80
  rack (3.0.8)
78
81
  rack-test (2.1.0)
79
82
  rack (>= 1.3)
80
83
  rainbow (3.1.1)
81
- rake (13.1.0)
82
- rdoc (6.6.3.1)
84
+ rake (13.2.1)
85
+ rdoc (6.7.0)
83
86
  psych (>= 4.0.0)
84
- regexp_parser (2.9.0)
85
- reline (0.5.0)
87
+ regexp_parser (2.9.2)
88
+ reline (0.5.10)
86
89
  io-console (~> 0.5)
87
- rexml (3.2.6)
88
- rubocop (1.62.1)
90
+ rexml (3.3.7)
91
+ rubocop (1.66.1)
89
92
  json (~> 2.3)
90
93
  language_server-protocol (>= 3.17.0)
91
94
  parallel (~> 1.10)
92
95
  parser (>= 3.3.0.2)
93
96
  rainbow (>= 2.2.2, < 4.0)
94
- regexp_parser (>= 1.8, < 3.0)
95
- rexml (>= 3.2.5, < 4.0)
96
- rubocop-ast (>= 1.31.1, < 2.0)
97
+ regexp_parser (>= 2.4, < 3.0)
98
+ rubocop-ast (>= 1.32.2, < 2.0)
97
99
  ruby-progressbar (~> 1.7)
98
100
  unicode-display_width (>= 2.4.0, < 3.0)
99
- rubocop-ast (1.31.2)
100
- parser (>= 3.3.0.4)
101
- rubocop-md (1.2.2)
102
- rubocop (>= 1.0)
103
- rubocop-minitest (0.35.0)
101
+ rubocop-ast (1.32.3)
102
+ parser (>= 3.3.1.0)
103
+ rubocop-md (1.2.3)
104
+ rubocop (>= 1.45)
105
+ rubocop-minitest (0.36.0)
104
106
  rubocop (>= 1.61, < 2.0)
105
107
  rubocop-ast (>= 1.31.1, < 2.0)
106
108
  rubocop-packaging (0.5.2)
107
109
  rubocop (>= 1.33, < 2.0)
108
- rubocop-performance (1.20.2)
110
+ rubocop-performance (1.21.1)
109
111
  rubocop (>= 1.48.1, < 2.0)
110
- rubocop-ast (>= 1.30.0, < 2.0)
112
+ rubocop-ast (>= 1.31.1, < 2.0)
111
113
  rubocop-rails (2.19.1)
112
114
  activesupport (>= 4.2.0)
113
115
  rack (>= 1.1)
@@ -122,20 +124,22 @@ GEM
122
124
  rubocop-rails (~> 2.0)
123
125
  ruby-progressbar (1.13.0)
124
126
  rubyzip (2.3.2)
125
- selenium-devtools (0.123.0)
127
+ securerandom (0.3.1)
128
+ selenium-devtools (0.128.0)
126
129
  selenium-webdriver (~> 4.2)
127
- selenium-webdriver (4.19.0)
130
+ selenium-webdriver (4.24.0)
128
131
  base64 (~> 0.2)
132
+ logger (~> 1.4)
129
133
  rexml (~> 3.2, >= 3.2.5)
130
134
  rubyzip (>= 1.2.2, < 3.0)
131
135
  websocket (~> 1.0)
132
- stringio (3.1.0)
136
+ stringio (3.1.1)
133
137
  typhoeus (1.4.1)
134
138
  ethon (>= 0.9.0)
135
139
  tzinfo (2.0.6)
136
140
  concurrent-ruby (~> 1.0)
137
- unicode-display_width (2.5.0)
138
- websocket (1.2.10)
141
+ unicode-display_width (2.6.0)
142
+ websocket (1.2.11)
139
143
  websocket-driver (0.7.6)
140
144
  websocket-extensions (>= 0.1.0)
141
145
  websocket-extensions (0.1.5)
@@ -78,6 +78,8 @@ module Zorki
78
78
  unless additional_search_parameters.nil?
79
79
  body_to_check = Oj.load(response.body)
80
80
 
81
+ debugger if body_to_check.include?("jokoy.komi.io")
82
+
81
83
  search_parameters = additional_search_parameters.split(",")
82
84
  search_parameters.each_with_index do |key, index|
83
85
  break if body_to_check.nil?
@@ -120,11 +122,12 @@ module Zorki
120
122
  begin
121
123
  element_json = JSON.parse(element.text)
122
124
 
123
- if element.text.include?("Dwayne")
125
+ if element.text.include?("jokoy.komi.io")
126
+ debugger
124
127
  # if element_json["require"].first.last.first["__bbox"].key?("require")
125
128
 
126
129
  # element_json["require"].first.last.first["__bbox"]["require"].each do |x|
127
- # debugger if x.to_s.include?("Dwayne Johnson")
130
+ # debugger if x.to_s.include?("Si mulut pelaut")
128
131
  # end
129
132
  # end
130
133
  end
@@ -138,12 +141,14 @@ module Zorki
138
141
  end
139
142
 
140
143
  if elements&.empty?
144
+ # debugger
141
145
  raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
142
146
  end
143
147
 
144
148
  return elements
145
149
  end
146
150
 
151
+ # debugger if response_body.nil?
147
152
  raise ContentUnavailableError.new("Response body nil") if response_body.nil?
148
153
  Oj.load(response_body)
149
154
  ensure
@@ -16,13 +16,30 @@ module Zorki
16
16
  # - *description
17
17
  # - *links
18
18
  # - *Profile image
19
- login
20
19
 
21
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
22
- graphql_script = graphql_script.first if graphql_script.class == Array
20
+ graphql_script = nil
21
+ count = 0
22
+ loop do
23
+ raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username }) if count > 3
23
24
 
24
- if graphql_script.nil?
25
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
25
+ print "Scraping user #{username}... (attempt #{count + 1})\n"
26
+ begin
27
+ login
28
+
29
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,media_count")
30
+ graphql_script = graphql_script.first if graphql_script.class == Array
31
+
32
+ if graphql_script.nil?
33
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
34
+ end
35
+ rescue Zorki::ContentUnavailableError => e
36
+ count += 1
37
+ page.driver.browser.navigate.to("https://www.instagram.com") # we want to go back to the main page so we start from scratch
38
+ sleep 10
39
+ next
40
+ end
41
+
42
+ break
26
43
  end
27
44
 
28
45
  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
@@ -36,10 +53,14 @@ module Zorki
36
53
  ["https://schema.org/FilmAction", "http://schema.org/WriteAction"].include?(stat["interactionType"])
37
54
  end.first
38
55
 
56
+ # number_of_posts = graphql_script["data"]["user"]["media_count"] if number_of_posts.nil?
57
+
39
58
  number_of_followers = graphql_script["interactionStatistic"].select do |stat|
40
59
  stat["interactionType"] == "http://schema.org/FollowAction"
41
60
  end.first
42
61
 
62
+ # number_of_followers = graphql_script["data"]["user"]["follower_count"] if number_of_followers.nil?
63
+
43
64
  begin
44
65
  profile_image_url = user["image"]
45
66
  {
@@ -76,6 +97,9 @@ module Zorki
76
97
  profile_image_url: profile_image_url
77
98
  }
78
99
  end
100
+ rescue Zorki::ContentUnavailableError => e
101
+ debugger
102
+ raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
79
103
  end
80
104
  end
81
105
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.23"
4
+ VERSION = "0.1.26"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -28,7 +28,18 @@ module Zorki
28
28
  end
29
29
 
30
30
  def to_honeybadger_context
31
- additional_data
31
+ @additional_data
32
+ end
33
+ end
34
+
35
+ class UserScrapingError < Error
36
+ def initialize(msg = "Zorki encountered an error scraping a user", additional_data: {})
37
+ super(msg)
38
+ @additional_data = additional_data
39
+ end
40
+
41
+ def to_honeybadger_context
42
+ @additional_data
32
43
  end
33
44
  end
34
45
 
data/zorki.gemspec CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency "apparition" # A Chrome driver for Capybara
34
34
  spec.add_dependency "typhoeus" # For making API requests
35
35
  spec.add_dependency "oj" # A faster JSON parser/loader than stdlib
36
- spec.add_dependency "selenium-webdriver", "~> 4.19" # Webdriver selenium
36
+ spec.add_dependency "selenium-webdriver", "~> 4.24.0" # Webdriver selenium
37
37
  spec.add_dependency "selenium-devtools" # Allow us to intercept requests
38
38
 
39
39
  spec.add_development_dependency "debug"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.23
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-29 00:00:00.000000000 Z
11
+ date: 2024-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '4.19'
75
+ version: 4.24.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '4.19'
82
+ version: 4.24.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: selenium-devtools
85
85
  requirement: !ruby/object:Gem::Requirement