scrap_kit 0.1.8 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9125aec53d6a517aa7679bc67df8c6ffd366d77afea643fae757ab7862767c0d
4
- data.tar.gz: bbfa2cae0560461e7fb1f5af1cb672a0b5c11686e75696b24578861e0d14aa1c
3
+ metadata.gz: b3c0ad11b6a5a21a15c49f4203e2256ebac9d83fa9ed8de4060edd7b49ed6513
4
+ data.tar.gz: 4a9aa63a96b9bfda89f578827a3f7df5f03458d9a8191911d5e2ffa723ac8913
5
5
  SHA512:
6
- metadata.gz: f2f8e6b7cb709ec2db86696adf27ec64e9c5faf2c339248ef34cb31764155d11de7904e1e881b7a21ba933469ebbccee998db35748030544fd699cb41a2cd7e1
7
- data.tar.gz: 79fcdcf90aae48eefdf83ed6e285114221047f007a15e7e5d3f5b357091b37f7f51e0778c6ad03f03caa0fa291666831f02a1459d426e522aef1f6c02b4b509d
6
+ metadata.gz: c21f2fc8782ca5552f92020cbf766f982cbbc1621256a0885340507e74b73d895086343111f65b3a7c6aba23771e345b8e57f962bcb3d1ab77a56392006469e4
7
+ data.tar.gz: bece24176f75eb7113fd5a8b0d1aaf151fa4d61ddd3bd697e9b4e53d2390888ef38fee6f9761ac3c02b8671710b8816b44a8236ef9f02cf547a95eff28796f88
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.13] 2020-09-04
4
+
5
+ ### Changed/Added
6
+ - Return nil if element doesn't exist for `extract_value_from_element`
7
+
8
+ ## [0.1.12] 2020-09-03
9
+
10
+ ### Changed/Added
11
+ - Return nil if `extract_attribute` fails
12
+
13
+ ## [0.1.11] 2020-09-03
14
+
15
+ ### Changed/Added
16
+ - Add `user_agent` accessor for browser
17
+
18
+ ## [0.1.10] 2020-09-03
19
+
20
+ ### Changed/Added
21
+ - Map attributes to JavaScript calls
22
+
23
+ ## [0.1.9] 2020-08-31
24
+
25
+ ### Changed/Added
26
+ - Set arguments for Chrome driver
27
+
3
28
  ## [0.1.8] 2020-08-29
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.8)
4
+ scrap_kit (0.1.13)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -21,7 +21,7 @@ GEM
21
21
  i18n (1.8.5)
22
22
  concurrent-ruby (~> 1.0)
23
23
  mini_portile2 (2.4.0)
24
- minitest (5.14.1)
24
+ minitest (5.14.2)
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
@@ -4,6 +4,8 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
+ attr_accessor :user_agent
8
+
7
9
  class << self
8
10
  def load(source)
9
11
  input = if source.is_a?(Hash)
@@ -27,7 +29,7 @@ module ScrapKit
27
29
  def run
28
30
  output = {}
29
31
 
30
- @browser = Watir::Browser.new(:chrome, headless: true)
32
+ @browser = create_browser
31
33
  @browser.goto @url
32
34
 
33
35
  @steps.each do |step|
@@ -89,6 +91,8 @@ module ScrapKit
89
91
  end
90
92
 
91
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
92
96
  if element&.respond_to?(:tag_name)
93
97
  if element.tag_name.downcase == "input"
94
98
  return element.attribute_value(:value)
@@ -125,6 +129,8 @@ module ScrapKit
125
129
 
126
130
  output
127
131
  end
132
+ elsif selector_or_object[:javascript]
133
+ @browser.execute_script(selector_or_object[:javascript])
128
134
  else
129
135
  found_elements = elements_from_selector(browser_or_element, selector_or_object)
130
136
 
@@ -137,6 +143,8 @@ module ScrapKit
137
143
  end
138
144
  end
139
145
  end
146
+ rescue
147
+ nil
140
148
  end
141
149
 
142
150
  private
@@ -187,10 +195,26 @@ module ScrapKit
187
195
  end
188
196
  end
189
197
 
190
- sleep 0.25
198
+ sleep 1
191
199
  @browser.wait_until do
192
200
  @browser.ready_state == "complete"
193
201
  end
194
202
  end
203
+
204
+ def create_browser
205
+ options = Selenium::WebDriver::Chrome::Options.new
206
+
207
+ options.add_argument "--headless"
208
+ options.add_argument "--window-size=1080x720"
209
+ options.add_argument "--hide-scrollbars"
210
+ options.add_argument "--user-agent=#{@user_agent}" if @user_agent
211
+
212
+ if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
213
+ options.add_argument "--no-sandbox"
214
+ options.binary = chrome_bin
215
+ end
216
+
217
+ Watir::Browser.new(:chrome, options: options)
218
+ end
195
219
  end
196
220
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.13"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-30 00:00:00.000000000 Z
11
+ date: 2020-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler