scrap_kit 0.1.11 → 0.1.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36f370f43360cdc2725cbb83591738dc46c5949cb995039f45cf1ddaccd091a4
4
- data.tar.gz: b904bef830b3e5cd88892677eed3f0cf97635e2d46277cc69280ca5ee7b6a0a3
3
+ metadata.gz: a3db092bd833a6c977d64468c38c6361922498683606f3ca543a3c7fdb5a91a1
4
+ data.tar.gz: 865ca828a23e0c2ce43f9070ce85e199979774964d10018881befd504925608f
5
5
  SHA512:
6
- metadata.gz: e0c1321ef88bf5be53603e41e0826676d9bc50bb741b4d45d8cc79ff42bc6e0360d989c3deb097d8c784b9c795495898a39cf7ff4a41f36c62f57d0f030a8915
7
- data.tar.gz: b668a193cf260b94239d1d1e016e9eb4b2d8e3de43aaf604d6d9fb255b5147295e2079e5da7b3b3dbb6177ea45ac413ce73467f11ba73d0f2495fa91b3196af4
6
+ metadata.gz: 0e25dc4878540dd64d4d436a07d70f250ef4c6451f2586758324ad819d256af190c7e8f72a3b113722911649485cdf2a9793186e607fa3ff159ee992b424d6f5
7
+ data.tar.gz: 2de4cd58ddc9f46f1a9d198d95163900bf1394e87ed9733a444693a1775f0d59a72793212fd70d2a3656005d5739f730109e33375733e28146f2ee3bedeecaa2
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.16] 2020-11-20
4
+
5
+ ### Changed/Added
6
+ - Return `src` value for `img` elements
7
+
8
+ ## [0.1.15] 2020-10-25
9
+
10
+ ### Changed/Added
11
+ - Add `--disable-dev-shm-usage` arguments to Chrome driver
12
+
13
+ ## [0.1.14] 2020-10-10
14
+
15
+ ### Changed/Added
16
+ - Increase timeout for Chrome driver
17
+
18
+ ## [0.1.13] 2020-09-04
19
+
20
+ ### Changed/Added
21
+ - Return nil if element doesn't exist for `extract_value_from_element`
22
+
23
+ ## [0.1.12] 2020-09-03
24
+
25
+ ### Changed/Added
26
+ - Return nil if `extract_attribute` fails
27
+
3
28
  ## [0.1.11] 2020-09-03
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.11)
4
+ scrap_kit (0.1.16)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -9,7 +9,7 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activesupport (6.0.3.2)
12
+ activesupport (6.0.3.4)
13
13
  concurrent-ruby (~> 1.0, >= 1.0.2)
14
14
  i18n (>= 0.7, < 2)
15
15
  minitest (~> 5.1)
@@ -25,7 +25,7 @@ GEM
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
28
- regexp_parser (1.7.1)
28
+ regexp_parser (1.8.2)
29
29
  rspec (3.9.0)
30
30
  rspec-core (~> 3.9.0)
31
31
  rspec-expectations (~> 3.9.0)
@@ -44,7 +44,7 @@ GEM
44
44
  childprocess (>= 0.5, < 4.0)
45
45
  rubyzip (>= 1.2.2)
46
46
  thread_safe (0.3.6)
47
- tzinfo (1.2.7)
47
+ tzinfo (1.2.8)
48
48
  thread_safe (~> 0.1)
49
49
  watir (6.16.5)
50
50
  regexp_parser (~> 1.2)
@@ -53,7 +53,7 @@ GEM
53
53
  nokogiri (~> 1.6)
54
54
  rubyzip (>= 1.3.0)
55
55
  selenium-webdriver (>= 3.0, < 4.0)
56
- zeitwerk (2.4.0)
56
+ zeitwerk (2.4.1)
57
57
 
58
58
  PLATFORMS
59
59
  ruby
@@ -4,7 +4,7 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
- attr_accessor :user_agent
7
+ attr_accessor :user_agent, :browser
8
8
 
9
9
  class << self
10
10
  def load(source)
@@ -91,9 +91,13 @@ module ScrapKit
91
91
  end
92
92
 
93
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
94
96
  if element&.respond_to?(:tag_name)
95
97
  if element.tag_name.downcase == "input"
96
98
  return element.attribute_value(:value)
99
+ elsif element.tag_name.downcase == "img"
100
+ return element.attribute_value(:src)
97
101
  end
98
102
  end
99
103
 
@@ -141,6 +145,8 @@ module ScrapKit
141
145
  end
142
146
  end
143
147
  end
148
+ rescue
149
+ nil
144
150
  end
145
151
 
146
152
  private
@@ -191,7 +197,7 @@ module ScrapKit
191
197
  end
192
198
  end
193
199
 
194
- sleep 0.25
200
+ sleep 1
195
201
  @browser.wait_until do
196
202
  @browser.ready_state == "complete"
197
203
  end
@@ -207,10 +213,14 @@ module ScrapKit
207
213
 
208
214
  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
209
215
  options.add_argument "--no-sandbox"
216
+ options.add_argument "--disable-dev-shm-usage"
210
217
  options.binary = chrome_bin
211
218
  end
212
219
 
213
- Watir::Browser.new(:chrome, options: options)
220
+ new_browser = Watir::Browser.new(:chrome, options: options)
221
+ new_browser.driver.manage.timeouts.page_load = 120
222
+
223
+ new_browser
214
224
  end
215
225
  end
216
226
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.11"
2
+ VERSION = "0.1.16"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-04 00:00:00.000000000 Z
11
+ date: 2020-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
144
  requirements: []
145
- rubygems_version: 3.1.2
145
+ rubygems_version: 3.1.4
146
146
  signing_key:
147
147
  specification_version: 4
148
148
  summary: Scrap web sites using recipes.