scrap_kit 0.1.11 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36f370f43360cdc2725cbb83591738dc46c5949cb995039f45cf1ddaccd091a4
4
- data.tar.gz: b904bef830b3e5cd88892677eed3f0cf97635e2d46277cc69280ca5ee7b6a0a3
3
+ metadata.gz: a3db092bd833a6c977d64468c38c6361922498683606f3ca543a3c7fdb5a91a1
4
+ data.tar.gz: 865ca828a23e0c2ce43f9070ce85e199979774964d10018881befd504925608f
5
5
  SHA512:
6
- metadata.gz: e0c1321ef88bf5be53603e41e0826676d9bc50bb741b4d45d8cc79ff42bc6e0360d989c3deb097d8c784b9c795495898a39cf7ff4a41f36c62f57d0f030a8915
7
- data.tar.gz: b668a193cf260b94239d1d1e016e9eb4b2d8e3de43aaf604d6d9fb255b5147295e2079e5da7b3b3dbb6177ea45ac413ce73467f11ba73d0f2495fa91b3196af4
6
+ metadata.gz: 0e25dc4878540dd64d4d436a07d70f250ef4c6451f2586758324ad819d256af190c7e8f72a3b113722911649485cdf2a9793186e607fa3ff159ee992b424d6f5
7
+ data.tar.gz: 2de4cd58ddc9f46f1a9d198d95163900bf1394e87ed9733a444693a1775f0d59a72793212fd70d2a3656005d5739f730109e33375733e28146f2ee3bedeecaa2
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.16] 2020-11-20
4
+
5
+ ### Changed/Added
6
+ - Return `src` value for `img` elements
7
+
8
+ ## [0.1.15] 2020-10-25
9
+
10
+ ### Changed/Added
11
+ - Add `--disable-dev-shm-usage` arguments to Chrome driver
12
+
13
+ ## [0.1.14] 2020-10-10
14
+
15
+ ### Changed/Added
16
+ - Increase timeout for Chrome driver
17
+
18
+ ## [0.1.13] 2020-09-04
19
+
20
+ ### Changed/Added
21
+ - Return nil if element doesn't exist for `extract_value_from_element`
22
+
23
+ ## [0.1.12] 2020-09-03
24
+
25
+ ### Changed/Added
26
+ - Return nil if `extract_attribute` fails
27
+
3
28
  ## [0.1.11] 2020-09-03
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.11)
4
+ scrap_kit (0.1.16)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -9,7 +9,7 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activesupport (6.0.3.2)
12
+ activesupport (6.0.3.4)
13
13
  concurrent-ruby (~> 1.0, >= 1.0.2)
14
14
  i18n (>= 0.7, < 2)
15
15
  minitest (~> 5.1)
@@ -25,7 +25,7 @@ GEM
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
28
- regexp_parser (1.7.1)
28
+ regexp_parser (1.8.2)
29
29
  rspec (3.9.0)
30
30
  rspec-core (~> 3.9.0)
31
31
  rspec-expectations (~> 3.9.0)
@@ -44,7 +44,7 @@ GEM
44
44
  childprocess (>= 0.5, < 4.0)
45
45
  rubyzip (>= 1.2.2)
46
46
  thread_safe (0.3.6)
47
- tzinfo (1.2.7)
47
+ tzinfo (1.2.8)
48
48
  thread_safe (~> 0.1)
49
49
  watir (6.16.5)
50
50
  regexp_parser (~> 1.2)
@@ -53,7 +53,7 @@ GEM
53
53
  nokogiri (~> 1.6)
54
54
  rubyzip (>= 1.3.0)
55
55
  selenium-webdriver (>= 3.0, < 4.0)
56
- zeitwerk (2.4.0)
56
+ zeitwerk (2.4.1)
57
57
 
58
58
  PLATFORMS
59
59
  ruby
@@ -4,7 +4,7 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
- attr_accessor :user_agent
7
+ attr_accessor :user_agent, :browser
8
8
 
9
9
  class << self
10
10
  def load(source)
@@ -91,9 +91,13 @@ module ScrapKit
91
91
  end
92
92
 
93
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
94
96
  if element&.respond_to?(:tag_name)
95
97
  if element.tag_name.downcase == "input"
96
98
  return element.attribute_value(:value)
99
+ elsif element.tag_name.downcase == "img"
100
+ return element.attribute_value(:src)
97
101
  end
98
102
  end
99
103
 
@@ -141,6 +145,8 @@ module ScrapKit
141
145
  end
142
146
  end
143
147
  end
148
+ rescue
149
+ nil
144
150
  end
145
151
 
146
152
  private
@@ -191,7 +197,7 @@ module ScrapKit
191
197
  end
192
198
  end
193
199
 
194
- sleep 0.25
200
+ sleep 1
195
201
  @browser.wait_until do
196
202
  @browser.ready_state == "complete"
197
203
  end
@@ -207,10 +213,14 @@ module ScrapKit
207
213
 
208
214
  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
209
215
  options.add_argument "--no-sandbox"
216
+ options.add_argument "--disable-dev-shm-usage"
210
217
  options.binary = chrome_bin
211
218
  end
212
219
 
213
- Watir::Browser.new(:chrome, options: options)
220
+ new_browser = Watir::Browser.new(:chrome, options: options)
221
+ new_browser.driver.manage.timeouts.page_load = 120
222
+
223
+ new_browser
214
224
  end
215
225
  end
216
226
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.11"
2
+ VERSION = "0.1.16"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-04 00:00:00.000000000 Z
11
+ date: 2020-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
144
  requirements: []
145
- rubygems_version: 3.1.2
145
+ rubygems_version: 3.1.4
146
146
  signing_key:
147
147
  specification_version: 4
148
148
  summary: Scrap web sites using recipes.