zorki 0.1.20 → 0.1.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e635e294bb6e73e0e57481ba79d23c603bab3ebbd73bbb4939d12f615644098
4
- data.tar.gz: 876ce40814980d04a962c8002662bf7462a269fa7b6885d89c419b1a2a64c8e0
3
+ metadata.gz: '0228694f5aafe38a9856efe3a4f9f042b94aeecbd5cb7ec919326f48c0b8000e'
4
+ data.tar.gz: c54da1aa5b98d3166a047a24022b74356538e7dcb974e032bc5236065c5edd4c
5
5
  SHA512:
6
- metadata.gz: 552de3b78d4bce7c18d6e0df96d3fdb4eb2b1b43e35961a64617e738510be4979453fe496a55e642329f47d2e4fc8ced52ff390263f39c010693eee4e71a9074
7
- data.tar.gz: 07be9124885ef2eedf0b7433385e1a29a1c1fd33c58686530d99b7c9767784df6b1b7343c57c97157fed756c196d33125b35e621da9a7bf4737da1855a05c911
6
+ metadata.gz: 30b2953e778ce0a6e36f221350c6760974de7279c764111cfe2327f3bcc5f11a93d19a4747d20c35260038e07c06bad2c3d2fc704b4878c23a41ba309ae689d9
7
+ data.tar.gz: 99ed979bcc6f4f0758bca49d34580333ba42c35d74ce3ccfe52871acb9665d3841e0d54ed68bb3af2cf1ef366ebb1f3f9ea9136784958778751d7142f09e5b38
@@ -5,6 +5,28 @@ require "typhoeus"
5
5
  module Zorki
6
6
  class PostScraper < Scraper
7
7
  def parse(id)
8
+ count = 0
9
+
10
+ until count == 2
11
+ puts "Retrieving ID #{id}"
12
+
13
+ begin
14
+ result = attempt_parse(id)
15
+ break
16
+ rescue ImageRequestZeroSize
17
+ debugger
18
+ # If the image is zero size, we retry
19
+ puts "Zero sized image found, retrying #{count}"
20
+ count += 1
21
+ end
22
+ end
23
+
24
+ raise ImageRequestZeroSize if count == 5
25
+
26
+ result
27
+ end
28
+
29
+ def attempt_parse(id)
8
30
  # Stuff we need to get from the DOM (implemented is starred):
9
31
  # - User *
10
32
  # - Text *
@@ -172,6 +172,7 @@ module Zorki
172
172
 
173
173
  def login
174
174
  puts "Attempting to login..."
175
+
175
176
  # Reset the sessions so that there's nothing laying around
176
177
  # page.driver.browser.close
177
178
 
@@ -185,7 +186,10 @@ module Zorki
185
186
 
186
187
  # We don't have to login if we already are
187
188
  begin
188
- return if find_field("Search", wait: 10).present?
189
+ if find_field("Search", wait: 10).present?
190
+ puts "Already logged in"
191
+ return
192
+ end
189
193
  rescue Capybara::ElementNotFound; end
190
194
 
191
195
  # Check if we're redirected to a login page, if we aren't we're already logged in
@@ -213,9 +217,10 @@ module Zorki
213
217
  # Sometimes Instagram just... doesn't let you log in
214
218
  raise "Instagram not accessible" if loop_count == 5
215
219
 
220
+ puts "Login successful"
216
221
  # No we don't want to save our login credentials
217
222
  begin
218
- puts "Checking and clearing Save Info button..."
223
+ puts "Checking and clearing Save Info button"
219
224
 
220
225
  find_button("Save Info").click()
221
226
  rescue Capybara::ElementNotFound; end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.20"
4
+ VERSION = "0.1.21"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -46,11 +46,34 @@ module Zorki
46
46
  end
47
47
  end
48
48
 
49
+ class ImageRequestZeroSize < RetryableError; end
50
+
49
51
  define_setting :temp_storage_location, "tmp/zorki"
50
52
 
51
53
  # Get an image from a URL and save to a temp folder set in the configuration under
52
54
  # temp_storage_location
55
+
56
+ # We do this because sometimes the images are coming back sized zero
53
57
  def self.retrieve_media(url)
58
+ count = 0
59
+
60
+ until count == 5
61
+ temp_file_name = attempt_retrieve_media(url)
62
+
63
+ # If it's more than 1kb return properly
64
+ return temp_file_name if File.size(temp_file_name) > 100
65
+
66
+ # Delete the file since we want to retry
67
+ debugger
68
+
69
+ File.delete(temp_file_name)
70
+ count += 1
71
+ end
72
+
73
+ raise(ImageRequestZeroSize)
74
+ end
75
+
76
+ def self.attempt_retrieve_media(url)
54
77
  response = Typhoeus.get(url)
55
78
 
56
79
  # Get the file extension if it's in the file
@@ -69,6 +92,7 @@ module Zorki
69
92
  # We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
70
93
  self.create_temp_storage_location
71
94
  File.binwrite(temp_file_name, response.body)
95
+
72
96
  temp_file_name
73
97
  end
74
98
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20
4
+ version: 0.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-28 00:00:00.000000000 Z
11
+ date: 2024-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara