scrap_kit 0.1.9 → 0.1.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab218fee7bbf85145afd031bbe59a0820b93eb1e57ee27d50163aa25dbcb8e46
4
- data.tar.gz: 4d304049b137f2750b3843c80ff925d872576459627bebac443e395d5e897749
3
+ metadata.gz: d387636088d4461f9f26bcc312f9f634d20fb24ce8d0e7eff9672dfbd87664ba
4
+ data.tar.gz: fe5e85497ba66a7370a4c9b4017058ad5f9d938ff2a4add96aba2888e6f6f6ca
5
5
  SHA512:
6
- metadata.gz: 5e2dd47d6b77ac3983efeb99e351a5e26b2737c1aa0c77972213a744ce44c6033dc2ae3498aca40f68471010b33b6fc3aeb1bf03f922ffa6e95aa0d715493722
7
- data.tar.gz: bc92e505d8ffca95d621ad8756350ae174bc316efbd015724d87478364c9d500682a4da765595c9158bc56ed3fd855e9827dc4f357fbea64901fdb53665c32b1
6
+ metadata.gz: fe464311773e564ee88ce8dab45478445a6291cc6a3c2151b2d45a819e8205fea168b15127fa61fab261042a30ddb8f605f3763629f58df528afa40c5853a324
7
+ data.tar.gz: 43d4c05f94cbff4daa91c22a0d0010dbd857abcd95ee73f1f501288733d33d3c5813b4b84c6c2e09b2a62a5efadd6808d5b7a2de7c012629899ac8b966af1a71
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.14] 2020-10-10
4
+
5
+ ### Changed/Added
6
+ - Increase timeout for Chrome driver
7
+
8
+ ## [0.1.13] 2020-09-04
9
+
10
+ ### Changed/Added
11
+ - Return nil if element doesn't exist for `extract_value_from_element`
12
+
13
+ ## [0.1.12] 2020-09-03
14
+
15
+ ### Changed/Added
16
+ - Return nil if `extract_attribute` fails
17
+
18
+ ## [0.1.11] 2020-09-03
19
+
20
+ ### Changed/Added
21
+ - Add `user_agent` accessor for browser
22
+
23
+ ## [0.1.10] 2020-09-03
24
+
25
+ ### Changed/Added
26
+ - Map attributes to JavaScript calls
27
+
3
28
  ## [0.1.9] 2020-08-31
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.9)
4
+ scrap_kit (0.1.14)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -9,7 +9,7 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activesupport (6.0.3.2)
12
+ activesupport (6.0.3.4)
13
13
  concurrent-ruby (~> 1.0, >= 1.0.2)
14
14
  i18n (>= 0.7, < 2)
15
15
  minitest (~> 5.1)
@@ -21,11 +21,11 @@ GEM
21
21
  i18n (1.8.5)
22
22
  concurrent-ruby (~> 1.0)
23
23
  mini_portile2 (2.4.0)
24
- minitest (5.14.1)
24
+ minitest (5.14.2)
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
28
- regexp_parser (1.7.1)
28
+ regexp_parser (1.8.1)
29
29
  rspec (3.9.0)
30
30
  rspec-core (~> 3.9.0)
31
31
  rspec-expectations (~> 3.9.0)
@@ -4,6 +4,8 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
+ attr_accessor :user_agent, :browser
8
+
7
9
  class << self
8
10
  def load(source)
9
11
  input = if source.is_a?(Hash)
@@ -89,6 +91,8 @@ module ScrapKit
89
91
  end
90
92
 
91
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
92
96
  if element&.respond_to?(:tag_name)
93
97
  if element.tag_name.downcase == "input"
94
98
  return element.attribute_value(:value)
@@ -125,6 +129,8 @@ module ScrapKit
125
129
 
126
130
  output
127
131
  end
132
+ elsif selector_or_object[:javascript]
133
+ @browser.execute_script(selector_or_object[:javascript])
128
134
  else
129
135
  found_elements = elements_from_selector(browser_or_element, selector_or_object)
130
136
 
@@ -137,6 +143,8 @@ module ScrapKit
137
143
  end
138
144
  end
139
145
  end
146
+ rescue
147
+ nil
140
148
  end
141
149
 
142
150
  private
@@ -187,7 +195,7 @@ module ScrapKit
187
195
  end
188
196
  end
189
197
 
190
- sleep 0.25
198
+ sleep 1
191
199
  @browser.wait_until do
192
200
  @browser.ready_state == "complete"
193
201
  end
@@ -199,13 +207,17 @@ module ScrapKit
199
207
  options.add_argument "--headless"
200
208
  options.add_argument "--window-size=1080x720"
201
209
  options.add_argument "--hide-scrollbars"
210
+ options.add_argument "--user-agent=#{@user_agent}" if @user_agent
202
211
 
203
212
  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
204
213
  options.add_argument "--no-sandbox"
205
214
  options.binary = chrome_bin
206
215
  end
207
216
 
208
- Watir::Browser.new(:chrome, options: options)
217
+ new_browser = Watir::Browser.new(:chrome, options: options)
218
+ new_browser.driver.manage.timeouts.page_load = 120
219
+
220
+ new_browser
209
221
  end
210
222
  end
211
223
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.9"
2
+ VERSION = "0.1.14"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-31 00:00:00.000000000 Z
11
+ date: 2020-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler