zorki 0.1.20 → 0.1.22

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e635e294bb6e73e0e57481ba79d23c603bab3ebbd73bbb4939d12f615644098
4
- data.tar.gz: 876ce40814980d04a962c8002662bf7462a269fa7b6885d89c419b1a2a64c8e0
3
+ metadata.gz: 8e6710ae1d1737110bc8385113470b9523414c74ff3ed88e0fe507215ddaa24c
4
+ data.tar.gz: 27df7892036e33313229ffefef35a6407667fd3d36275bee9ff933a07178a102
5
5
  SHA512:
6
- metadata.gz: 552de3b78d4bce7c18d6e0df96d3fdb4eb2b1b43e35961a64617e738510be4979453fe496a55e642329f47d2e4fc8ced52ff390263f39c010693eee4e71a9074
7
- data.tar.gz: 07be9124885ef2eedf0b7433385e1a29a1c1fd33c58686530d99b7c9767784df6b1b7343c57c97157fed756c196d33125b35e621da9a7bf4737da1855a05c911
6
+ metadata.gz: 6147d1586fbb95a28a018139e8bcb71a1f5731a3a15c65767ab43d0f29395db98f0a18f13080aeb2cd9eb82c909db9a65931ca6100beabbc08be01c343729e92
7
+ data.tar.gz: 4bac8fc6d924404f1671c302c16e5eb895bef4ab9a63c03b5951b4f4c6f9e6314073468ebb5ad4bad3bda0819afcd096a53160950439e044fb55922f80c460cc
@@ -5,6 +5,27 @@ require "typhoeus"
5
5
  module Zorki
6
6
  class PostScraper < Scraper
7
7
  def parse(id)
8
+ count = 0
9
+
10
+ until count == 2
11
+ puts "Retrieving ID #{id}"
12
+
13
+ begin
14
+ result = attempt_parse(id)
15
+ break
16
+ rescue ImageRequestZeroSize
17
+ # If the image is zero size, we retry
18
+ puts "Zero sized image found, retrying #{count}"
19
+ count += 1
20
+ end
21
+ end
22
+
23
+ raise ImageRequestZeroSize if count == 5
24
+
25
+ result
26
+ end
27
+
28
+ def attempt_parse(id)
8
29
  # Stuff we need to get from the DOM (implemented is starred):
9
30
  # - User *
10
31
  # - Text *
@@ -172,6 +172,7 @@ module Zorki
172
172
 
173
173
  def login
174
174
  puts "Attempting to login..."
175
+
175
176
  # Reset the sessions so that there's nothing laying around
176
177
  # page.driver.browser.close
177
178
 
@@ -185,7 +186,10 @@ module Zorki
185
186
 
186
187
  # We don't have to login if we already are
187
188
  begin
188
- return if find_field("Search", wait: 10).present?
189
+ if find_field("Search", wait: 10).present?
190
+ puts "Already logged in"
191
+ return
192
+ end
189
193
  rescue Capybara::ElementNotFound; end
190
194
 
191
195
  # Check if we're redirected to a login page, if we aren't we're already logged in
@@ -213,9 +217,10 @@ module Zorki
213
217
  # Sometimes Instagram just... doesn't let you log in
214
218
  raise "Instagram not accessible" if loop_count == 5
215
219
 
220
+ puts "Login successful"
216
221
  # No we don't want to save our login credentials
217
222
  begin
218
- puts "Checking and clearing Save Info button..."
223
+ puts "Checking and clearing Save Info button"
219
224
 
220
225
  find_button("Save Info").click()
221
226
  rescue Capybara::ElementNotFound; end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.20"
4
+ VERSION = "0.1.22"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -46,11 +46,32 @@ module Zorki
46
46
  end
47
47
  end
48
48
 
49
+ class ImageRequestZeroSize < RetryableError; end
50
+
49
51
  define_setting :temp_storage_location, "tmp/zorki"
50
52
 
51
53
  # Get an image from a URL and save to a temp folder set in the configuration under
52
54
  # temp_storage_location
55
+
56
+ # We do this because sometimes the images are coming back sized zero
53
57
  def self.retrieve_media(url)
58
+ count = 0
59
+
60
+ until count == 5
61
+ temp_file_name = attempt_retrieve_media(url)
62
+
63
+ # If it's more than 1kb return properly
64
+ return temp_file_name if File.size(temp_file_name) > 100
65
+
66
+ # Delete the file since we want to retry
67
+ File.delete(temp_file_name)
68
+ count += 1
69
+ end
70
+
71
+ raise(ImageRequestZeroSize)
72
+ end
73
+
74
+ def self.attempt_retrieve_media(url)
54
75
  response = Typhoeus.get(url)
55
76
 
56
77
  # Get the file extension if it's in the file
@@ -69,6 +90,7 @@ module Zorki
69
90
  # We do this in case the folder isn't created yet, since it's a temp folder we'll just do so
70
91
  self.create_temp_storage_location
71
92
  File.binwrite(temp_file_name, response.body)
93
+
72
94
  temp_file_name
73
95
  end
74
96
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20
4
+ version: 0.1.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-28 00:00:00.000000000 Z
11
+ date: 2024-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara