scrap_kit 0.1.9 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab218fee7bbf85145afd031bbe59a0820b93eb1e57ee27d50163aa25dbcb8e46
4
- data.tar.gz: 4d304049b137f2750b3843c80ff925d872576459627bebac443e395d5e897749
3
+ metadata.gz: d387636088d4461f9f26bcc312f9f634d20fb24ce8d0e7eff9672dfbd87664ba
4
+ data.tar.gz: fe5e85497ba66a7370a4c9b4017058ad5f9d938ff2a4add96aba2888e6f6f6ca
5
5
  SHA512:
6
- metadata.gz: 5e2dd47d6b77ac3983efeb99e351a5e26b2737c1aa0c77972213a744ce44c6033dc2ae3498aca40f68471010b33b6fc3aeb1bf03f922ffa6e95aa0d715493722
7
- data.tar.gz: bc92e505d8ffca95d621ad8756350ae174bc316efbd015724d87478364c9d500682a4da765595c9158bc56ed3fd855e9827dc4f357fbea64901fdb53665c32b1
6
+ metadata.gz: fe464311773e564ee88ce8dab45478445a6291cc6a3c2151b2d45a819e8205fea168b15127fa61fab261042a30ddb8f605f3763629f58df528afa40c5853a324
7
+ data.tar.gz: 43d4c05f94cbff4daa91c22a0d0010dbd857abcd95ee73f1f501288733d33d3c5813b4b84c6c2e09b2a62a5efadd6808d5b7a2de7c012629899ac8b966af1a71
@@ -1,5 +1,30 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.14] 2020-10-10
4
+
5
+ ### Changed/Added
6
+ - Increase timeout for Chrome driver
7
+
8
+ ## [0.1.13] 2020-09-04
9
+
10
+ ### Changed/Added
11
+ - Return nil if element doesn't exist for `extract_value_from_element`
12
+
13
+ ## [0.1.12] 2020-09-03
14
+
15
+ ### Changed/Added
16
+ - Return nil if `extract_attribute` fails
17
+
18
+ ## [0.1.11] 2020-09-03
19
+
20
+ ### Changed/Added
21
+ - Add `user_agent` accessor for browser
22
+
23
+ ## [0.1.10] 2020-09-03
24
+
25
+ ### Changed/Added
26
+ - Map attributes to JavaScript calls
27
+
3
28
  ## [0.1.9] 2020-08-31
4
29
 
5
30
  ### Changed/Added
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scrap_kit (0.1.9)
4
+ scrap_kit (0.1.14)
5
5
  activesupport (~> 6.0)
6
6
  watir (~> 6.16.5)
7
7
  webdrivers (~> 4.0)
@@ -9,7 +9,7 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activesupport (6.0.3.2)
12
+ activesupport (6.0.3.4)
13
13
  concurrent-ruby (~> 1.0, >= 1.0.2)
14
14
  i18n (>= 0.7, < 2)
15
15
  minitest (~> 5.1)
@@ -21,11 +21,11 @@ GEM
21
21
  i18n (1.8.5)
22
22
  concurrent-ruby (~> 1.0)
23
23
  mini_portile2 (2.4.0)
24
- minitest (5.14.1)
24
+ minitest (5.14.2)
25
25
  nokogiri (1.10.10)
26
26
  mini_portile2 (~> 2.4.0)
27
27
  rake (13.0.1)
28
- regexp_parser (1.7.1)
28
+ regexp_parser (1.8.1)
29
29
  rspec (3.9.0)
30
30
  rspec-core (~> 3.9.0)
31
31
  rspec-expectations (~> 3.9.0)
@@ -4,6 +4,8 @@ require "watir"
4
4
 
5
5
  module ScrapKit
6
6
  class Recipe
7
+ attr_accessor :user_agent, :browser
8
+
7
9
  class << self
8
10
  def load(source)
9
11
  input = if source.is_a?(Hash)
@@ -89,6 +91,8 @@ module ScrapKit
89
91
  end
90
92
 
91
93
  def extract_value_from_element(element)
94
+ return nil unless element.exists?
95
+
92
96
  if element&.respond_to?(:tag_name)
93
97
  if element.tag_name.downcase == "input"
94
98
  return element.attribute_value(:value)
@@ -125,6 +129,8 @@ module ScrapKit
125
129
 
126
130
  output
127
131
  end
132
+ elsif selector_or_object[:javascript]
133
+ @browser.execute_script(selector_or_object[:javascript])
128
134
  else
129
135
  found_elements = elements_from_selector(browser_or_element, selector_or_object)
130
136
 
@@ -137,6 +143,8 @@ module ScrapKit
137
143
  end
138
144
  end
139
145
  end
146
+ rescue
147
+ nil
140
148
  end
141
149
 
142
150
  private
@@ -187,7 +195,7 @@ module ScrapKit
187
195
  end
188
196
  end
189
197
 
190
- sleep 0.25
198
+ sleep 1
191
199
  @browser.wait_until do
192
200
  @browser.ready_state == "complete"
193
201
  end
@@ -199,13 +207,17 @@ module ScrapKit
199
207
  options.add_argument "--headless"
200
208
  options.add_argument "--window-size=1080x720"
201
209
  options.add_argument "--hide-scrollbars"
210
+ options.add_argument "--user-agent=#{@user_agent}" if @user_agent
202
211
 
203
212
  if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
204
213
  options.add_argument "--no-sandbox"
205
214
  options.binary = chrome_bin
206
215
  end
207
216
 
208
- Watir::Browser.new(:chrome, options: options)
217
+ new_browser = Watir::Browser.new(:chrome, options: options)
218
+ new_browser.driver.manage.timeouts.page_load = 120
219
+
220
+ new_browser
209
221
  end
210
222
  end
211
223
  end
@@ -1,3 +1,3 @@
1
1
  module ScrapKit
2
- VERSION = "0.1.9"
2
+ VERSION = "0.1.14"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrap_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gustavo Leon
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-31 00:00:00.000000000 Z
11
+ date: 2020-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler