zorki 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e635e294bb6e73e0e57481ba79d23c603bab3ebbd73bbb4939d12f615644098
4
- data.tar.gz: 876ce40814980d04a962c8002662bf7462a269fa7b6885d89c419b1a2a64c8e0
3
+ metadata.gz: '0228694f5aafe38a9856efe3a4f9f042b94aeecbd5cb7ec919326f48c0b8000e'
4
+ data.tar.gz: c54da1aa5b98d3166a047a24022b74356538e7dcb974e032bc5236065c5edd4c
5
5
  SHA512:
6
- metadata.gz: 552de3b78d4bce7c18d6e0df96d3fdb4eb2b1b43e35961a64617e738510be4979453fe496a55e642329f47d2e4fc8ced52ff390263f39c010693eee4e71a9074
7
- data.tar.gz: 07be9124885ef2eedf0b7433385e1a29a1c1fd33c58686530d99b7c9767784df6b1b7343c57c97157fed756c196d33125b35e621da9a7bf4737da1855a05c911
6
+ metadata.gz: 30b2953e778ce0a6e36f221350c6760974de7279c764111cfe2327f3bcc5f11a93d19a4747d20c35260038e07c06bad2c3d2fc704b4878c23a41ba309ae689d9
7
+ data.tar.gz: 99ed979bcc6f4f0758bca49d34580333ba42c35d74ce3ccfe52871acb9665d3841e0d54ed68bb3af2cf1ef366ebb1f3f9ea9136784958778751d7142f09e5b38
@@ -5,6 +5,28 @@ require "typhoeus"
5
5
  module Zorki
6
6
  class PostScraper < Scraper
7
7
  def parse(id)
8
+ count = 0
9
+
10
+ until count == 2
11
+ puts "Retrieving ID #{id}"
12
+
13
+ begin
14
+ result = attempt_parse(id)
15
+ break
16
+ rescue ImageRequestZeroSize
17
+ debugger
18
+ # If the image is zero size, we retry
19
+ puts "Zero sized image found, retrying #{count}"
20
+ count += 1
21
+ end
22
+ end
23
+
24
+ raise ImageRequestZeroSize if count == 5
25
+
26
+ result
27
+ end
28
+
29
+ def attempt_parse(id)
8
30
  # Stuff we need to get from the DOM (implemented is starred):
9
31
  # - User *
10
32
  # - Text *
@@ -172,6 +172,7 @@ module Zorki
172
172
 
173
173
  def login
174
174
  puts "Attempting to login..."
175
+
175
176
  # Reset the sessions so that there's nothing laying around
176
177
  # page.driver.browser.close
177
178
 
@@ -185,7 +186,10 @@ module Zorki
185
186
 
186
187
  # We don't have to login if we already are
187
188
  begin
188
- return if find_field("Search", wait: 10).present?
189
+ if find_field("Search", wait: 10).present?
190
+ puts "Already logged in"
191
+ return
192
+ end
189
193
  rescue Capybara::ElementNotFound; end
190
194
 
191
195
  # Check if we're redirected to a login page, if we aren't we're already logged in
@@ -213,9 +217,10 @@ module Zorki
213
217
  # Sometimes Instagram just... doesn't let you log in
214
218
  raise "Instagram not accessible" if loop_count == 5
215
219
 
220
+ puts "Login successful"
216
221
  # No we don't want to save our login credentials
217
222
  begin
218
- puts "Checking and clearing Save Info button..."
223
+ puts "Checking and clearing Save Info button"
219
224
 
220
225
  find_button("Save Info").click()
221
226
  rescue Capybara::ElementNotFound; end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.20"
4
+ VERSION = "0.1.21"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -46,11 +46,34 @@ module Zorki
46
46
  end
47
47
  end
48
48
 
49
+ class ImageRequestZeroSize < RetryableError; end
50
+
49
51
  define_setting :temp_storage_location, "tmp/zorki"
50
52
 
51
53
  # Get an image from a URL and save to a temp folder set in the configuration under
52
54
  # temp_storage_location
55
+
56
+ # We do this because sometimes the images are coming back sized zero
53
57
  def self.retrieve_media(url)
58
+ count = 0
59
+
60
+ until count == 5
61
+ temp_file_name = attempt_retrieve_media(url)
62
+
63
+ # If it's more than 1kb return properly
64
+ return temp_file_name if File.size(temp_file_name) > 100
65
+
66
+ # Delete the file since we want to retry
67
+ debugger
68
+
69
+ File.delete(temp_file_name)
70
+ count += 1
71
+ end
72
+
73
+ raise(ImageRequestZeroSize)
74
+ end
75
+
76
+ def self.attempt_retrieve_media(url)
54
77
  response = Typhoeus.get(url)
55
78
 
56
79
  # Get the file extension if it's in the file
@@ -69,6 +92,7 @@ module Zorki
69
92
  # We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
70
93
  self.create_temp_storage_location
71
94
  File.binwrite(temp_file_name, response.body)
95
+
72
96
  temp_file_name
73
97
  end
74
98
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20
4
+ version: 0.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-28 00:00:00.000000000 Z
11
+ date: 2024-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara