zorki 0.1.9 → 0.1.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1eadd7ea03af064623b2e0a3deebc32bfa424ac60aab4713b61e3db94c10ee66
4
- data.tar.gz: c4cd5d7bb8148eb23fb7d5b80f08f9118a9b39f717ca395a38b1abca34a7ce70
3
+ metadata.gz: '0228694f5aafe38a9856efe3a4f9f042b94aeecbd5cb7ec919326f48c0b8000e'
4
+ data.tar.gz: c54da1aa5b98d3166a047a24022b74356538e7dcb974e032bc5236065c5edd4c
5
5
  SHA512:
6
- metadata.gz: dcfc9a4129cf62063c34461cfb1ee119ee702a64ed892b10909ddacb4cae544e7875eed7a53355db60c67f590292be951cd515d516b75456aae281a29ea3cbc5
7
- data.tar.gz: feb1ca4d372217487c02311241ae9c4379eacc82336cc91e48c7f80ba68bef3538a14b0f17fb77e254d68203c77d4dcba9559b832c6dd9eda4879426a0e19c83
6
+ metadata.gz: 30b2953e778ce0a6e36f221350c6760974de7279c764111cfe2327f3bcc5f11a93d19a4747d20c35260038e07c06bad2c3d2fc704b4878c23a41ba309ae689d9
7
+ data.tar.gz: 99ed979bcc6f4f0758bca49d34580333ba42c35d74ce3ccfe52871acb9665d3841e0d54ed68bb3af2cf1ef366ebb1f3f9ea9136784958778751d7142f09e5b38
data/.gitignore CHANGED
@@ -10,3 +10,5 @@
10
10
  .byebug_history
11
11
 
12
12
  selenium-server-*
13
+
14
+ zorki-*.gem
data/Gemfile.lock CHANGED
@@ -1,33 +1,18 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- zorki (0.1.8)
4
+ zorki (0.1.9)
5
5
  apparition
6
6
  capybara
7
7
  oj
8
8
  selenium-devtools
9
- selenium-webdriver
9
+ selenium-webdriver (~> 4.19)
10
10
  typhoeus
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- actionpack (7.1.0.beta1)
16
- actionview (= 7.1.0.beta1)
17
- activesupport (= 7.1.0.beta1)
18
- nokogiri (>= 1.8.5)
19
- rack (>= 2.2.4)
20
- rack-session (>= 1.0.1)
21
- rack-test (>= 0.6.3)
22
- rails-dom-testing (~> 2.2)
23
- rails-html-sanitizer (~> 1.6)
24
- actionview (7.1.0.beta1)
25
- activesupport (= 7.1.0.beta1)
26
- builder (~> 3.1)
27
- erubi (~> 1.11)
28
- rails-dom-testing (~> 2.2)
29
- rails-html-sanitizer (~> 1.6)
30
- activesupport (7.1.0.beta1)
15
+ activesupport (7.1.3.2)
31
16
  base64
32
17
  bigdecimal
33
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -37,125 +22,98 @@ GEM
37
22
  minitest (>= 5.1)
38
23
  mutex_m
39
24
  tzinfo (~> 2.0)
40
- addressable (2.8.5)
25
+ addressable (2.8.6)
41
26
  public_suffix (>= 2.0.2, < 6.0)
42
27
  apparition (0.6.0)
43
28
  capybara (~> 3.13, < 4)
44
29
  websocket-driver (>= 0.6.5)
45
30
  ast (2.4.2)
46
- base64 (0.1.1)
47
- bigdecimal (3.1.4)
48
- builder (3.2.4)
49
- capybara (3.39.2)
31
+ base64 (0.2.0)
32
+ bigdecimal (3.1.7)
33
+ capybara (3.40.0)
50
34
  addressable
51
35
  matrix
52
36
  mini_mime (>= 0.1.3)
53
- nokogiri (~> 1.8)
37
+ nokogiri (~> 1.11)
54
38
  rack (>= 1.6.0)
55
39
  rack-test (>= 0.6.3)
56
40
  regexp_parser (>= 1.5, < 3.0)
57
41
  xpath (~> 3.2)
58
- concurrent-ruby (1.2.2)
42
+ concurrent-ruby (1.2.3)
59
43
  connection_pool (2.4.1)
60
- crass (1.0.6)
61
44
  curb (1.0.5)
62
- debug (1.8.0)
63
- irb (>= 1.5.0)
64
- reline (>= 0.3.1)
45
+ debug (1.9.1)
46
+ irb (~> 1.10)
47
+ reline (>= 0.3.8)
65
48
  dotenv (2.7.6)
66
- drb (2.1.1)
67
- ruby2_keywords
68
- erubi (1.12.0)
49
+ drb (2.2.1)
69
50
  ethon (0.16.0)
70
51
  ffi (>= 1.15.0)
71
- ffi (1.15.5)
72
- i18n (1.14.1)
52
+ ffi (1.16.3)
53
+ i18n (1.14.4)
73
54
  concurrent-ruby (~> 1.0)
74
- io-console (0.6.0)
75
- irb (1.8.1)
55
+ io-console (0.7.2)
56
+ irb (1.12.0)
76
57
  rdoc
77
- reline (>= 0.3.8)
78
- json (2.6.3)
58
+ reline (>= 0.4.2)
59
+ json (2.7.1)
79
60
  language_server-protocol (3.17.0.3)
80
- loofah (2.21.3)
81
- crass (~> 1.0.2)
82
- nokogiri (>= 1.12.0)
83
61
  matrix (0.4.2)
84
62
  mini_mime (1.1.5)
85
- minitest (5.20.0)
86
- mutex_m (0.1.2)
87
- nokogiri (1.15.4-arm64-darwin)
63
+ minitest (5.22.3)
64
+ mutex_m (0.2.0)
65
+ nokogiri (1.16.3-arm64-darwin)
88
66
  racc (~> 1.4)
89
- oj (3.16.1)
90
- parallel (1.23.0)
91
- parser (3.2.2.3)
67
+ oj (3.16.3)
68
+ bigdecimal (>= 3.0)
69
+ parallel (1.24.0)
70
+ parser (3.3.0.5)
92
71
  ast (~> 2.4.1)
93
72
  racc
94
- psych (5.1.0)
73
+ psych (5.1.2)
95
74
  stringio
96
- public_suffix (5.0.3)
97
- racc (1.7.1)
75
+ public_suffix (5.0.4)
76
+ racc (1.7.3)
98
77
  rack (3.0.8)
99
- rack-session (2.0.0)
100
- rack (>= 3.0.0)
101
78
  rack-test (2.1.0)
102
79
  rack (>= 1.3)
103
- rackup (2.1.0)
104
- rack (>= 3)
105
- webrick (~> 1.8)
106
- rails-dom-testing (2.2.0)
107
- activesupport (>= 5.0.0)
108
- minitest
109
- nokogiri (>= 1.6)
110
- rails-html-sanitizer (1.6.0)
111
- loofah (~> 2.21)
112
- nokogiri (~> 1.14)
113
- railties (7.1.0.beta1)
114
- actionpack (= 7.1.0.beta1)
115
- activesupport (= 7.1.0.beta1)
116
- irb
117
- rackup (>= 1.0.0)
118
- rake (>= 12.2)
119
- thor (~> 1.0, >= 1.2.2)
120
- zeitwerk (~> 2.6)
121
80
  rainbow (3.1.1)
122
- rake (13.0.6)
123
- rdoc (6.5.0)
81
+ rake (13.1.0)
82
+ rdoc (6.6.3.1)
124
83
  psych (>= 4.0.0)
125
- regexp_parser (2.8.1)
126
- reline (0.3.8)
84
+ regexp_parser (2.9.0)
85
+ reline (0.5.0)
127
86
  io-console (~> 0.5)
128
87
  rexml (3.2.6)
129
- rubocop (1.56.3)
130
- base64 (~> 0.1.1)
88
+ rubocop (1.62.1)
131
89
  json (~> 2.3)
132
90
  language_server-protocol (>= 3.17.0)
133
91
  parallel (~> 1.10)
134
- parser (>= 3.2.2.3)
92
+ parser (>= 3.3.0.2)
135
93
  rainbow (>= 2.2.2, < 4.0)
136
94
  regexp_parser (>= 1.8, < 3.0)
137
95
  rexml (>= 3.2.5, < 4.0)
138
- rubocop-ast (>= 1.28.1, < 2.0)
96
+ rubocop-ast (>= 1.31.1, < 2.0)
139
97
  ruby-progressbar (~> 1.7)
140
98
  unicode-display_width (>= 2.4.0, < 3.0)
141
- rubocop-ast (1.29.0)
142
- parser (>= 3.2.1.0)
143
- rubocop-md (1.2.0)
99
+ rubocop-ast (1.31.2)
100
+ parser (>= 3.3.0.4)
101
+ rubocop-md (1.2.2)
144
102
  rubocop (>= 1.0)
145
- rubocop-minitest (0.31.1)
146
- rubocop (>= 1.39, < 2.0)
103
+ rubocop-minitest (0.35.0)
104
+ rubocop (>= 1.61, < 2.0)
105
+ rubocop-ast (>= 1.31.1, < 2.0)
147
106
  rubocop-packaging (0.5.2)
148
107
  rubocop (>= 1.33, < 2.0)
149
- rubocop-performance (1.19.0)
150
- rubocop (>= 1.7.0, < 2.0)
151
- rubocop-ast (>= 0.4.0)
108
+ rubocop-performance (1.20.2)
109
+ rubocop (>= 1.48.1, < 2.0)
110
+ rubocop-ast (>= 1.30.0, < 2.0)
152
111
  rubocop-rails (2.19.1)
153
112
  activesupport (>= 4.2.0)
154
113
  rack (>= 1.1)
155
114
  rubocop (>= 1.33.0, < 2.0)
156
- rubocop-rails_config (1.14.1)
157
- railties (>= 5.0)
158
- rubocop (>= 1.48.0)
115
+ rubocop-rails_config (1.16.0)
116
+ rubocop (>= 1.57.0)
159
117
  rubocop-ast (>= 1.26.0)
160
118
  rubocop-md
161
119
  rubocop-minitest (~> 0.22)
@@ -163,29 +121,26 @@ GEM
163
121
  rubocop-performance (~> 1.11)
164
122
  rubocop-rails (~> 2.0)
165
123
  ruby-progressbar (1.13.0)
166
- ruby2_keywords (0.0.5)
167
124
  rubyzip (2.3.2)
168
- selenium-devtools (0.120.0)
125
+ selenium-devtools (0.123.0)
169
126
  selenium-webdriver (~> 4.2)
170
- selenium-webdriver (4.16.0)
127
+ selenium-webdriver (4.19.0)
128
+ base64 (~> 0.2)
171
129
  rexml (~> 3.2, >= 3.2.5)
172
130
  rubyzip (>= 1.2.2, < 3.0)
173
131
  websocket (~> 1.0)
174
- stringio (3.0.8)
175
- thor (1.2.2)
176
- typhoeus (1.4.0)
132
+ stringio (3.1.0)
133
+ typhoeus (1.4.1)
177
134
  ethon (>= 0.9.0)
178
135
  tzinfo (2.0.6)
179
136
  concurrent-ruby (~> 1.0)
180
- unicode-display_width (2.4.2)
181
- webrick (1.8.1)
137
+ unicode-display_width (2.5.0)
182
138
  websocket (1.2.10)
183
139
  websocket-driver (0.7.6)
184
140
  websocket-extensions (>= 0.1.0)
185
141
  websocket-extensions (0.1.5)
186
142
  xpath (3.2.0)
187
143
  nokogiri (~> 1.8)
188
- zeitwerk (2.6.11)
189
144
 
190
145
  PLATFORMS
191
146
  arm64-darwin-22
@@ -5,6 +5,28 @@ require "typhoeus"
5
5
  module Zorki
6
6
  class PostScraper < Scraper
7
7
  def parse(id)
8
+ count = 0
9
+
10
+ until count == 2
11
+ puts "Retrieving ID #{id}"
12
+
13
+ begin
14
+ result = attempt_parse(id)
15
+ break
16
+ rescue ImageRequestZeroSize
17
+ debugger
18
+ # If the image is zero size, we retry
19
+ puts "Zero sized image found, retrying #{count}"
20
+ count += 1
21
+ end
22
+ end
23
+
24
+ raise ImageRequestZeroSize if count == 5
25
+
26
+ result
27
+ end
28
+
29
+ def attempt_parse(id)
8
30
  # Stuff we need to get from the DOM (implemented is starred):
9
31
  # - User *
10
32
  # - Text *
@@ -171,6 +171,8 @@ module Zorki
171
171
  end
172
172
 
173
173
  def login
174
+ puts "Attempting to login..."
175
+
174
176
  # Reset the sessions so that there's nothing laying around
175
177
  # page.driver.browser.close
176
178
 
@@ -184,7 +186,10 @@ module Zorki
184
186
 
185
187
  # We don't have to login if we already are
186
188
  begin
187
- return if find_field("Search", wait: 10).present?
189
+ if find_field("Search", wait: 10).present?
190
+ puts "Already logged in"
191
+ return
192
+ end
188
193
  rescue Capybara::ElementNotFound; end
189
194
 
190
195
  # Check if we're redirected to a login page, if we aren't we're already logged in
@@ -193,24 +198,31 @@ module Zorki
193
198
  # Try to log in
194
199
  loop_count = 0
195
200
  while loop_count < 5 do
201
+ puts "Attempting to fill login field ##{loop_count}"
202
+
196
203
  fill_in("username", with: ENV["INSTAGRAM_USER_NAME"])
197
204
  fill_in("password", with: ENV["INSTAGRAM_PASSWORD"])
198
205
 
199
206
  begin
200
- click_button("Log in", exact_text: true) # Note: "Log in" (lowercase `in`) instead redirects to Facebook's login page
207
+ find_button("Log in").click() # Note: "Log in" (lowercase `in`) should be exact instead, it redirects to Facebook's login page
201
208
  rescue Capybara::ElementNotFound; end # If we can't find it don't break horribly, just keep waiting
202
209
 
203
- break unless has_css?('p[data-testid="login-error-message"', wait: 10)
210
+ break unless has_css?('p[data-testid="login-error-message"', wait: 3)
204
211
  loop_count += 1
205
- sleep(rand * 10.3)
212
+ random_length = rand(1...2)
213
+ puts "Sleeping for #{random_length} seconds"
214
+ sleep(random_length)
206
215
  end
207
216
 
208
217
  # Sometimes Instagram just... doesn't let you log in
209
218
  raise "Instagram not accessible" if loop_count == 5
210
219
 
220
+ puts "Login successful"
211
221
  # No we don't want to save our login credentials
212
222
  begin
213
- click_on("Save Info")
223
+ puts "Checking and clearing Save Info button"
224
+
225
+ find_button("Save Info").click()
214
226
  rescue Capybara::ElementNotFound; end
215
227
  end
216
228
 
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.9"
4
+ VERSION = "0.1.21"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -46,11 +46,34 @@ module Zorki
46
46
  end
47
47
  end
48
48
 
49
+ class ImageRequestZeroSize < RetryableError; end
50
+
49
51
  define_setting :temp_storage_location, "tmp/zorki"
50
52
 
51
53
  # Get an image from a URL and save to a temp folder set in the configuration under
52
54
  # temp_storage_location
55
+
56
+ # We do this because sometimes the images are coming back sized zero
53
57
  def self.retrieve_media(url)
58
+ count = 0
59
+
60
+ until count == 5
61
+ temp_file_name = attempt_retrieve_media(url)
62
+
63
+ # If it's more than 1kb return properly
64
+ return temp_file_name if File.size(temp_file_name) > 100
65
+
66
+ # Delete the file since we want to retry
67
+ debugger
68
+
69
+ File.delete(temp_file_name)
70
+ count += 1
71
+ end
72
+
73
+ raise(ImageRequestZeroSize)
74
+ end
75
+
76
+ def self.attempt_retrieve_media(url)
54
77
  response = Typhoeus.get(url)
55
78
 
56
79
  # Get the file extension if it's in the file
@@ -69,6 +92,7 @@ module Zorki
69
92
  # We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
70
93
  self.create_temp_storage_location
71
94
  File.binwrite(temp_file_name, response.body)
95
+
72
96
  temp_file_name
73
97
  end
74
98
 
data/zorki.gemspec CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency "apparition" # A Chrome driver for Capybara
34
34
  spec.add_dependency "typhoeus" # For making API requests
35
35
  spec.add_dependency "oj" # A faster JSON parser/loader than stdlib
36
- spec.add_dependency "selenium-webdriver" # Webdriver selenium
36
+ spec.add_dependency "selenium-webdriver", "~> 4.19" # Webdriver selenium
37
37
  spec.add_dependency "selenium-devtools" # Allow us to intercept requests
38
38
 
39
39
  spec.add_development_dependency "debug"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-27 00:00:00.000000000 Z
11
+ date: 2024-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -70,16 +70,16 @@ dependencies:
70
70
  name: selenium-webdriver
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '4.19'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '4.19'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: selenium-devtools
85
85
  requirement: !ruby/object:Gem::Requirement