scrap_kit 0.1.9 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +4 -4
- data/lib/scrap_kit/recipe.rb +14 -2
- data/lib/scrap_kit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d387636088d4461f9f26bcc312f9f634d20fb24ce8d0e7eff9672dfbd87664ba
|
4
|
+
data.tar.gz: fe5e85497ba66a7370a4c9b4017058ad5f9d938ff2a4add96aba2888e6f6f6ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe464311773e564ee88ce8dab45478445a6291cc6a3c2151b2d45a819e8205fea168b15127fa61fab261042a30ddb8f605f3763629f58df528afa40c5853a324
|
7
|
+
data.tar.gz: 43d4c05f94cbff4daa91c22a0d0010dbd857abcd95ee73f1f501288733d33d3c5813b4b84c6c2e09b2a62a5efadd6808d5b7a2de7c012629899ac8b966af1a71
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.14] 2020-10-10
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Increase timeout for Chrome driver
|
7
|
+
|
8
|
+
## [0.1.13] 2020-09-04
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
12
|
+
|
13
|
+
## [0.1.12] 2020-09-03
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Return nil if `extract_attribute` fails
|
17
|
+
|
18
|
+
## [0.1.11] 2020-09-03
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Add `user_agent` accessor for browser
|
22
|
+
|
23
|
+
## [0.1.10] 2020-09-03
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Map attributes to JavaScript calls
|
27
|
+
|
3
28
|
## [0.1.9] 2020-08-31
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.14)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -9,7 +9,7 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.3.
|
12
|
+
activesupport (6.0.3.4)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
@@ -21,11 +21,11 @@ GEM
|
|
21
21
|
i18n (1.8.5)
|
22
22
|
concurrent-ruby (~> 1.0)
|
23
23
|
mini_portile2 (2.4.0)
|
24
|
-
minitest (5.14.
|
24
|
+
minitest (5.14.2)
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
28
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.8.1)
|
29
29
|
rspec (3.9.0)
|
30
30
|
rspec-core (~> 3.9.0)
|
31
31
|
rspec-expectations (~> 3.9.0)
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,6 +4,8 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
+
attr_accessor :user_agent, :browser
|
8
|
+
|
7
9
|
class << self
|
8
10
|
def load(source)
|
9
11
|
input = if source.is_a?(Hash)
|
@@ -89,6 +91,8 @@ module ScrapKit
|
|
89
91
|
end
|
90
92
|
|
91
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
92
96
|
if element&.respond_to?(:tag_name)
|
93
97
|
if element.tag_name.downcase == "input"
|
94
98
|
return element.attribute_value(:value)
|
@@ -125,6 +129,8 @@ module ScrapKit
|
|
125
129
|
|
126
130
|
output
|
127
131
|
end
|
132
|
+
elsif selector_or_object[:javascript]
|
133
|
+
@browser.execute_script(selector_or_object[:javascript])
|
128
134
|
else
|
129
135
|
found_elements = elements_from_selector(browser_or_element, selector_or_object)
|
130
136
|
|
@@ -137,6 +143,8 @@ module ScrapKit
|
|
137
143
|
end
|
138
144
|
end
|
139
145
|
end
|
146
|
+
rescue
|
147
|
+
nil
|
140
148
|
end
|
141
149
|
|
142
150
|
private
|
@@ -187,7 +195,7 @@ module ScrapKit
|
|
187
195
|
end
|
188
196
|
end
|
189
197
|
|
190
|
-
sleep
|
198
|
+
sleep 1
|
191
199
|
@browser.wait_until do
|
192
200
|
@browser.ready_state == "complete"
|
193
201
|
end
|
@@ -199,13 +207,17 @@ module ScrapKit
|
|
199
207
|
options.add_argument "--headless"
|
200
208
|
options.add_argument "--window-size=1080x720"
|
201
209
|
options.add_argument "--hide-scrollbars"
|
210
|
+
options.add_argument "--user-agent=#{@user_agent}" if @user_agent
|
202
211
|
|
203
212
|
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
204
213
|
options.add_argument "--no-sandbox"
|
205
214
|
options.binary = chrome_bin
|
206
215
|
end
|
207
216
|
|
208
|
-
Watir::Browser.new(:chrome, options: options)
|
217
|
+
new_browser = Watir::Browser.new(:chrome, options: options)
|
218
|
+
new_browser.driver.manage.timeouts.page_load = 120
|
219
|
+
|
220
|
+
new_browser
|
209
221
|
end
|
210
222
|
end
|
211
223
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|