scrap_kit 0.1.10 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 28049015b73f5b5508d952f54fd9d5a326e3377043be78776ad925e963161ffa
4
- data.tar.gz: ebf4112f8a71bc4fe6ba98d6bc02a417f3cd1269a493f59657164945747363d2
3
+ metadata.gz: 96b093d466ebf16f066524ee5ffe6d462fa8b6f5fb0cf4839e58ccb4ea0a64e7
4
+ data.tar.gz: b720216ab63263c2d28842285d2fd1a7aab6e0634e51098e9de9a2e8d471e1bb
5
5
  SHA512:
6
- metadata.gz: 50bbf6756482a1d3a94ea5994efa9921049405bfd3d5486ab854d0478adaa0d4c7e891e645ee9c9d774398cacacb26a6814b34efdf858be6ab14a7765da2c9b2
7
- data.tar.gz: 8457382de09844d4fc469c4045e1e916335a1b3152bfd3b5bd0b0c464b590bc306d1fa7f8d66e0c3c40abe490d0b35aed2659223a6fcb0d81ec9c2efa94498d9
6
+ metadata.gz: 354f88e8f875eca5a45ef557501005752e5ca8e1abbb4bd6933f7640ddf9f64a31f33d766eaa9e4b154da59cf6a53cb5a065795e597b3b213ea6c7610260da6d
7
+ data.tar.gz: cefdf23aa7b1c104992c125e40deb9637269f1c8f29e56c2e9bfbee98e9f59000a8a4c3f7c6a5c710a6e2dc3cc0a8208de5ab863bfaf59533a4095cf9721a4a3
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.15] 2020-10-25
4
+
5
+ ### Changed/Added
6
+ - Add `--disable-dev-shm-usage` arguments to Chrome driver
7
+
8
+ ## [0.1.14] 2020-10-10
9
+
10
+ ### Changed/Added
11
+ - Increase timeout for Chrome driver
12
+
13
+ ## [0.1.13] 2020-09-04
14
+
15
+ ### Changed/Added
16
+ - Return nil if element doesn't exist for `extract_value_from_element`
17
+
18
+ ## [0.1.12] 2020-09-03
19
+
20
+ ### Changed/Added
21
+ - Return nil if `extract_attribute` fails
22
+
23
+ ## [0.1.11] 2020-09-03
24
+
25
+ ### Changed/Added
26
+ - Add `user_agent` accessor for browser
27
+
3
28
  ## [0.1.10] 2020-09-03
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.10)
4
+ scrap_kit (0.1.15)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -9,7 +9,7 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activesupport (6.0.3.2)
12
+ activesupport (6.0.3.4)
13
13
  concurrent-ruby (~> 1.0, >= 1.0.2)
14
14
  i18n (>= 0.7, < 2)
15
15
  minitest (~> 5.1)
@@ -25,7 +25,7 @@ GEM
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
28
- regexp_parser (1.7.1)
28
+ regexp_parser (1.8.2)
29
29
  rspec (3.9.0)
30
30
  rspec-core (~> 3.9.0)
31
31
  rspec-expectations (~> 3.9.0)
@@ -4,6 +4,8 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
+ attr_accessor :user_agent, :browser
8
+
7
9
  class << self
8
10
  def load(source)
9
11
  input = if source.is_a?(Hash)
@@ -89,6 +91,8 @@ module ScrapKit
89
91
  end
90
92
 
91
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
92
96
  if element&.respond_to?(:tag_name)
93
97
  if element.tag_name.downcase == "input"
94
98
  return element.attribute_value(:value)
@@ -139,6 +143,8 @@ module ScrapKit
139
143
  end
140
144
  end
141
145
  end
146
+ rescue
147
+ nil
142
148
  end
143
149
 
144
150
  private
@@ -189,7 +195,7 @@ module ScrapKit
189
195
  end
190
196
  end
191
197
 
192
- sleep 0.25
198
+ sleep 1
193
199
  @browser.wait_until do
194
200
  @browser.ready_state == "complete"
195
201
  end
@@ -201,13 +207,18 @@ module ScrapKit
201
207
  options.add_argument "--headless"
202
208
  options.add_argument "--window-size=1080x720"
203
209
  options.add_argument "--hide-scrollbars"
210
+ options.add_argument "--user-agent=#{@user_agent}" if @user_agent
204
211
 
205
212
  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
206
213
  options.add_argument "--no-sandbox"
214
+ options.add_argument "--disable-dev-shm-usage"
207
215
  options.binary = chrome_bin
208
216
  end
209
217
 
210
- Watir::Browser.new(:chrome, options: options)
218
+ new_browser = Watir::Browser.new(:chrome, options: options)
219
+ new_browser.driver.manage.timeouts.page_load = 120
220
+
221
+ new_browser
211
222
  end
212
223
  end
213
224
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.10"
2
+ VERSION = "0.1.15"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-04 00:00:00.000000000 Z
11
+ date: 2020-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
142
  - !ruby/object:Gem::Version
143
143
  version: '0'
144
144
  requirements: []
145
- rubygems_version: 3.1.2
145
+ rubygems_version: 3.1.4
146
146
  signing_key:
147
147
  specification_version: 4
148
148
  summary: Scrap web sites using recipes.