zorki 0.1.7 → 0.1.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 469ec1ebba03ba1d24f78b28bbd6f1b939b94db49b1e61e6e47647e129ed64c1
4
- data.tar.gz: 96655c2182ac16467e581be7965f5588d59f4ea0c181085d5a5c1f7575882b02
3
+ metadata.gz: 1e635e294bb6e73e0e57481ba79d23c603bab3ebbd73bbb4939d12f615644098
4
+ data.tar.gz: 876ce40814980d04a962c8002662bf7462a269fa7b6885d89c419b1a2a64c8e0
5
5
  SHA512:
6
- metadata.gz: ec36d4c67c01d9a1198d3916b28f5b4fbc87f4f084ee732a15d268027d6ad164540ea87686ecd0548145d0a85cb3d0028f74e6ddd1d228d291cae78609b655f4
7
- data.tar.gz: cf1662ba6bef2b0d3cf5eeb2d879efb5f69efa265268ccf4b6b6156b1b6f440a29a4cbaf2c7dd3f76a4b25a3c1f89d1b9fd384c953cea345169ed0061afbfd54
6
+ metadata.gz: 552de3b78d4bce7c18d6e0df96d3fdb4eb2b1b43e35961a64617e738510be4979453fe496a55e642329f47d2e4fc8ced52ff390263f39c010693eee4e71a9074
7
+ data.tar.gz: 07be9124885ef2eedf0b7433385e1a29a1c1fd33c58686530d99b7c9767784df6b1b7343c57c97157fed756c196d33125b35e621da9a7bf4737da1855a05c911
data/.gitignore CHANGED
@@ -10,3 +10,5 @@
10
10
  .byebug_history
11
11
 
12
12
  selenium-server-*
13
+
14
+ zorki-*.gem
data/Gemfile.lock CHANGED
@@ -1,33 +1,18 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- zorki (0.1.6)
4
+ zorki (0.1.9)
5
5
  apparition
6
6
  capybara
7
7
  oj
8
8
  selenium-devtools
9
- selenium-webdriver
9
+ selenium-webdriver (~> 4.19)
10
10
  typhoeus
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- actionpack (7.1.0.beta1)
16
- actionview (= 7.1.0.beta1)
17
- activesupport (= 7.1.0.beta1)
18
- nokogiri (>= 1.8.5)
19
- rack (>= 2.2.4)
20
- rack-session (>= 1.0.1)
21
- rack-test (>= 0.6.3)
22
- rails-dom-testing (~> 2.2)
23
- rails-html-sanitizer (~> 1.6)
24
- actionview (7.1.0.beta1)
25
- activesupport (= 7.1.0.beta1)
26
- builder (~> 3.1)
27
- erubi (~> 1.11)
28
- rails-dom-testing (~> 2.2)
29
- rails-html-sanitizer (~> 1.6)
30
- activesupport (7.1.0.beta1)
15
+ activesupport (7.1.3.2)
31
16
  base64
32
17
  bigdecimal
33
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -37,122 +22,98 @@ GEM
37
22
  minitest (>= 5.1)
38
23
  mutex_m
39
24
  tzinfo (~> 2.0)
40
- addressable (2.8.5)
25
+ addressable (2.8.6)
41
26
  public_suffix (>= 2.0.2, < 6.0)
42
27
  apparition (0.6.0)
43
28
  capybara (~> 3.13, < 4)
44
29
  websocket-driver (>= 0.6.5)
45
30
  ast (2.4.2)
46
- base64 (0.1.1)
47
- bigdecimal (3.1.4)
48
- builder (3.2.4)
49
- capybara (3.39.2)
31
+ base64 (0.2.0)
32
+ bigdecimal (3.1.7)
33
+ capybara (3.40.0)
50
34
  addressable
51
35
  matrix
52
36
  mini_mime (>= 0.1.3)
53
- nokogiri (~> 1.8)
37
+ nokogiri (~> 1.11)
54
38
  rack (>= 1.6.0)
55
39
  rack-test (>= 0.6.3)
56
40
  regexp_parser (>= 1.5, < 3.0)
57
41
  xpath (~> 3.2)
58
- concurrent-ruby (1.2.2)
42
+ concurrent-ruby (1.2.3)
59
43
  connection_pool (2.4.1)
60
- crass (1.0.6)
61
44
  curb (1.0.5)
45
+ debug (1.9.1)
46
+ irb (~> 1.10)
47
+ reline (>= 0.3.8)
62
48
  dotenv (2.7.6)
63
- drb (2.1.1)
64
- ruby2_keywords
65
- erubi (1.12.0)
49
+ drb (2.2.1)
66
50
  ethon (0.16.0)
67
51
  ffi (>= 1.15.0)
68
- ffi (1.15.5)
69
- i18n (1.14.1)
52
+ ffi (1.16.3)
53
+ i18n (1.14.4)
70
54
  concurrent-ruby (~> 1.0)
71
- io-console (0.6.0)
72
- irb (1.8.1)
55
+ io-console (0.7.2)
56
+ irb (1.12.0)
73
57
  rdoc
74
- reline (>= 0.3.8)
75
- json (2.6.3)
58
+ reline (>= 0.4.2)
59
+ json (2.7.1)
76
60
  language_server-protocol (3.17.0.3)
77
- loofah (2.21.3)
78
- crass (~> 1.0.2)
79
- nokogiri (>= 1.12.0)
80
61
  matrix (0.4.2)
81
62
  mini_mime (1.1.5)
82
- minitest (5.20.0)
83
- mutex_m (0.1.2)
84
- nokogiri (1.15.4-arm64-darwin)
63
+ minitest (5.22.3)
64
+ mutex_m (0.2.0)
65
+ nokogiri (1.16.3-arm64-darwin)
85
66
  racc (~> 1.4)
86
- oj (3.16.1)
87
- parallel (1.23.0)
88
- parser (3.2.2.3)
67
+ oj (3.16.3)
68
+ bigdecimal (>= 3.0)
69
+ parallel (1.24.0)
70
+ parser (3.3.0.5)
89
71
  ast (~> 2.4.1)
90
72
  racc
91
- psych (5.1.0)
73
+ psych (5.1.2)
92
74
  stringio
93
- public_suffix (5.0.3)
94
- racc (1.7.1)
75
+ public_suffix (5.0.4)
76
+ racc (1.7.3)
95
77
  rack (3.0.8)
96
- rack-session (2.0.0)
97
- rack (>= 3.0.0)
98
78
  rack-test (2.1.0)
99
79
  rack (>= 1.3)
100
- rackup (2.1.0)
101
- rack (>= 3)
102
- webrick (~> 1.8)
103
- rails-dom-testing (2.2.0)
104
- activesupport (>= 5.0.0)
105
- minitest
106
- nokogiri (>= 1.6)
107
- rails-html-sanitizer (1.6.0)
108
- loofah (~> 2.21)
109
- nokogiri (~> 1.14)
110
- railties (7.1.0.beta1)
111
- actionpack (= 7.1.0.beta1)
112
- activesupport (= 7.1.0.beta1)
113
- irb
114
- rackup (>= 1.0.0)
115
- rake (>= 12.2)
116
- thor (~> 1.0, >= 1.2.2)
117
- zeitwerk (~> 2.6)
118
80
  rainbow (3.1.1)
119
- rake (13.0.6)
120
- rdoc (6.5.0)
81
+ rake (13.1.0)
82
+ rdoc (6.6.3.1)
121
83
  psych (>= 4.0.0)
122
- regexp_parser (2.8.1)
123
- reline (0.3.8)
84
+ regexp_parser (2.9.0)
85
+ reline (0.5.0)
124
86
  io-console (~> 0.5)
125
87
  rexml (3.2.6)
126
- rubocop (1.56.3)
127
- base64 (~> 0.1.1)
88
+ rubocop (1.62.1)
128
89
  json (~> 2.3)
129
90
  language_server-protocol (>= 3.17.0)
130
91
  parallel (~> 1.10)
131
- parser (>= 3.2.2.3)
92
+ parser (>= 3.3.0.2)
132
93
  rainbow (>= 2.2.2, < 4.0)
133
94
  regexp_parser (>= 1.8, < 3.0)
134
95
  rexml (>= 3.2.5, < 4.0)
135
- rubocop-ast (>= 1.28.1, < 2.0)
96
+ rubocop-ast (>= 1.31.1, < 2.0)
136
97
  ruby-progressbar (~> 1.7)
137
98
  unicode-display_width (>= 2.4.0, < 3.0)
138
- rubocop-ast (1.29.0)
139
- parser (>= 3.2.1.0)
140
- rubocop-md (1.2.0)
99
+ rubocop-ast (1.31.2)
100
+ parser (>= 3.3.0.4)
101
+ rubocop-md (1.2.2)
141
102
  rubocop (>= 1.0)
142
- rubocop-minitest (0.31.1)
143
- rubocop (>= 1.39, < 2.0)
103
+ rubocop-minitest (0.35.0)
104
+ rubocop (>= 1.61, < 2.0)
105
+ rubocop-ast (>= 1.31.1, < 2.0)
144
106
  rubocop-packaging (0.5.2)
145
107
  rubocop (>= 1.33, < 2.0)
146
- rubocop-performance (1.19.0)
147
- rubocop (>= 1.7.0, < 2.0)
148
- rubocop-ast (>= 0.4.0)
108
+ rubocop-performance (1.20.2)
109
+ rubocop (>= 1.48.1, < 2.0)
110
+ rubocop-ast (>= 1.30.0, < 2.0)
149
111
  rubocop-rails (2.19.1)
150
112
  activesupport (>= 4.2.0)
151
113
  rack (>= 1.1)
152
114
  rubocop (>= 1.33.0, < 2.0)
153
- rubocop-rails_config (1.14.1)
154
- railties (>= 5.0)
155
- rubocop (>= 1.48.0)
115
+ rubocop-rails_config (1.16.0)
116
+ rubocop (>= 1.57.0)
156
117
  rubocop-ast (>= 1.26.0)
157
118
  rubocop-md
158
119
  rubocop-minitest (~> 0.22)
@@ -160,35 +121,33 @@ GEM
160
121
  rubocop-performance (~> 1.11)
161
122
  rubocop-rails (~> 2.0)
162
123
  ruby-progressbar (1.13.0)
163
- ruby2_keywords (0.0.5)
164
124
  rubyzip (2.3.2)
165
- selenium-devtools (0.119.0)
125
+ selenium-devtools (0.123.0)
166
126
  selenium-webdriver (~> 4.2)
167
- selenium-webdriver (4.15.0)
127
+ selenium-webdriver (4.19.0)
128
+ base64 (~> 0.2)
168
129
  rexml (~> 3.2, >= 3.2.5)
169
130
  rubyzip (>= 1.2.2, < 3.0)
170
131
  websocket (~> 1.0)
171
- stringio (3.0.8)
172
- thor (1.2.2)
173
- typhoeus (1.4.0)
132
+ stringio (3.1.0)
133
+ typhoeus (1.4.1)
174
134
  ethon (>= 0.9.0)
175
135
  tzinfo (2.0.6)
176
136
  concurrent-ruby (~> 1.0)
177
- unicode-display_width (2.4.2)
178
- webrick (1.8.1)
137
+ unicode-display_width (2.5.0)
179
138
  websocket (1.2.10)
180
139
  websocket-driver (0.7.6)
181
140
  websocket-extensions (>= 0.1.0)
182
141
  websocket-extensions (0.1.5)
183
142
  xpath (3.2.0)
184
143
  nokogiri (~> 1.8)
185
- zeitwerk (2.6.11)
186
144
 
187
145
  PLATFORMS
188
146
  arm64-darwin-22
189
147
 
190
148
  DEPENDENCIES
191
149
  curb (~> 1.0, >= 1.0.5)
150
+ debug
192
151
  dotenv (~> 2.7.6)
193
152
  minitest (~> 5.0)
194
153
  rack (= 3.0.8)
@@ -97,7 +97,6 @@ module Zorki
97
97
 
98
98
  # This has to run last since it switches pages
99
99
  user = User.lookup([username]).first
100
- page.quit
101
100
 
102
101
  {
103
102
  images: images,
@@ -114,27 +114,32 @@ module Zorki
114
114
  # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
115
115
  # end
116
116
 
117
- elements = doc.search("script").map do |element|
118
- element_json = nil
117
+ elements = doc.search("script").filter_map do |element|
118
+ parsed_element_json = nil
119
119
  begin
120
- element_json = JSON.parse(element)
120
+ element_json = JSON.parse(element.text)
121
121
 
122
- element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
123
- rescue StandardError => e
122
+ parsed_element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
123
+ rescue StandardError
124
124
  next
125
125
  end
126
126
 
127
- element_json
128
- end.compact
127
+ parsed_element_json
128
+ end
129
129
 
130
130
  if elements&.empty?
131
- raise ContentUnavailableError
131
+ raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
132
132
  end
133
+
133
134
  return elements
134
135
  end
135
136
 
136
- raise ContentUnavailableError if response_body.nil?
137
+ raise ContentUnavailableError.new("Response body nil") if response_body.nil?
137
138
  Oj.load(response_body)
139
+ ensure
140
+ page.quit
141
+ # TRY THIS TO MAKE SURE CHROME GETS CLOSED?
142
+ # We may also want to not do this and make sure the same browser is reused instead for cookie purposes
138
143
  end
139
144
 
140
145
  private
@@ -166,8 +171,9 @@ module Zorki
166
171
  end
167
172
 
168
173
  def login
174
+ puts "Attempting to login..."
169
175
  # Reset the sessions so that there's nothing laying around
170
- page.quit
176
+ # page.driver.browser.close
171
177
 
172
178
  # Check if we're on a Instagram page already, if not visit it.
173
179
  unless page.driver.browser.current_url.include? "instagram.com"
@@ -188,16 +194,20 @@ module Zorki
188
194
  # Try to log in
189
195
  loop_count = 0
190
196
  while loop_count < 5 do
197
+ puts "Attempting to fill login field ##{loop_count}"
198
+
191
199
  fill_in("username", with: ENV["INSTAGRAM_USER_NAME"])
192
200
  fill_in("password", with: ENV["INSTAGRAM_PASSWORD"])
193
201
 
194
202
  begin
195
- click_button("Log in", exact_text: true) # Note: "Log in" (lowercase `in`) instead redirects to Facebook's login page
203
+ find_button("Log in").click() # Note: "Log in" (lowercase `in`) should be exact instead, it redirects to Facebook's login page
196
204
  rescue Capybara::ElementNotFound; end # If we can't find it don't break horribly, just keep waiting
197
205
 
198
- break unless has_css?('p[data-testid="login-error-message"', wait: 10)
206
+ break unless has_css?('p[data-testid="login-error-message"', wait: 3)
199
207
  loop_count += 1
200
- sleep(rand * 10.3)
208
+ random_length = rand(1...2)
209
+ puts "Sleeping for #{random_length} seconds"
210
+ sleep(random_length)
201
211
  end
202
212
 
203
213
  # Sometimes Instagram just... doesn't let you log in
@@ -205,7 +215,9 @@ module Zorki
205
215
 
206
216
  # No we don't want to save our login credentials
207
217
  begin
208
- click_on("Save Info")
218
+ puts "Checking and clearing Save Info button..."
219
+
220
+ find_button("Save Info").click()
209
221
  rescue Capybara::ElementNotFound; end
210
222
  end
211
223
 
@@ -242,6 +254,15 @@ module Zorki
242
254
  # Multiply everything and insure we get an integer back
243
255
  (number * multiplier).to_i
244
256
  end
257
+
258
+ # def reset_window
259
+ # old_handle = page.driver.browser.window_handle
260
+ # page.driver.browser.switch_to.new_window(:window)
261
+ # new_handle = page.driver.browser.window_handle
262
+ # page.driver.browser.switch_to.window(old_handle)
263
+ # page.driver.browser.close
264
+ # page.driver.browser.switch_to.window(new_handle)
265
+ # end
245
266
  end
246
267
  end
247
268
 
@@ -21,6 +21,10 @@ module Zorki
21
21
  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
22
22
  graphql_script = graphql_script.first if graphql_script.class == Array
23
23
 
24
+ if graphql_script.nil?
25
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
26
+ end
27
+
24
28
  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
25
29
  user = graphql_script["author"]
26
30
 
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.7"
4
+ VERSION = "0.1.20"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -20,8 +20,15 @@ module Zorki
20
20
  end
21
21
 
22
22
  class ContentUnavailableError < Error
23
- def initialize(msg = "Zorki could not find content requested")
24
- super
23
+ attr_reader :additional_data
24
+
25
+ def initialize(msg = "Zorki could not find content requested", additional_data: {})
26
+ super(msg)
27
+ @additional_data = additional_data
28
+ end
29
+
30
+ def to_honeybadger_context
31
+ additional_data
25
32
  end
26
33
  end
27
34
 
data/zorki.gemspec CHANGED
@@ -29,15 +29,15 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
 
32
- # Uncomment to register a new dependency of your gem
33
- # spec.add_dependency "example-gem", "~> 1.0"
34
32
  spec.add_dependency "capybara" # For scraping and running browsers
35
33
  spec.add_dependency "apparition" # A Chrome driver for Capybara
36
34
  spec.add_dependency "typhoeus" # For making API requests
37
35
  spec.add_dependency "oj" # A faster JSON parser/loader than stdlib
38
- spec.add_dependency "selenium-webdriver" # Webdriver selenium
36
+ spec.add_dependency "selenium-webdriver", "~> 4.19" # Webdriver selenium
39
37
  spec.add_dependency "selenium-devtools" # Allow us to intercept requests
40
38
 
39
+ spec.add_development_dependency "debug"
40
+
41
41
  # For more information and examples about making a new gem, checkout our
42
42
  # guide at: https://bundler.io/guides/creating_gem.html
43
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-05 00:00:00.000000000 Z
11
+ date: 2024-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -68,6 +68,20 @@ dependencies:
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: selenium-webdriver
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4.19'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4.19'
83
+ - !ruby/object:Gem::Dependency
84
+ name: selenium-devtools
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
87
  - - ">="
@@ -81,13 +95,13 @@ dependencies:
81
95
  - !ruby/object:Gem::Version
82
96
  version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
- name: selenium-devtools
98
+ name: debug
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ">="
88
102
  - !ruby/object:Gem::Version
89
103
  version: '0'
90
- type: :runtime
104
+ type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements: