scrap_kit 0.1.9 → 0.1.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +4 -4
- data/lib/scrap_kit/recipe.rb +14 -2
- data/lib/scrap_kit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d387636088d4461f9f26bcc312f9f634d20fb24ce8d0e7eff9672dfbd87664ba
|
4
|
+
data.tar.gz: fe5e85497ba66a7370a4c9b4017058ad5f9d938ff2a4add96aba2888e6f6f6ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe464311773e564ee88ce8dab45478445a6291cc6a3c2151b2d45a819e8205fea168b15127fa61fab261042a30ddb8f605f3763629f58df528afa40c5853a324
|
7
|
+
data.tar.gz: 43d4c05f94cbff4daa91c22a0d0010dbd857abcd95ee73f1f501288733d33d3c5813b4b84c6c2e09b2a62a5efadd6808d5b7a2de7c012629899ac8b966af1a71
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.14] 2020-10-10
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Increase timeout for Chrome driver
|
7
|
+
|
8
|
+
## [0.1.13] 2020-09-04
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
12
|
+
|
13
|
+
## [0.1.12] 2020-09-03
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Return nil if `extract_attribute` fails
|
17
|
+
|
18
|
+
## [0.1.11] 2020-09-03
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Add `user_agent` accessor for browser
|
22
|
+
|
23
|
+
## [0.1.10] 2020-09-03
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Map attributes to JavaScript calls
|
27
|
+
|
3
28
|
## [0.1.9] 2020-08-31
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.14)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -9,7 +9,7 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.3.
|
12
|
+
activesupport (6.0.3.4)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
@@ -21,11 +21,11 @@ GEM
|
|
21
21
|
i18n (1.8.5)
|
22
22
|
concurrent-ruby (~> 1.0)
|
23
23
|
mini_portile2 (2.4.0)
|
24
|
-
minitest (5.14.
|
24
|
+
minitest (5.14.2)
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
28
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.8.1)
|
29
29
|
rspec (3.9.0)
|
30
30
|
rspec-core (~> 3.9.0)
|
31
31
|
rspec-expectations (~> 3.9.0)
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,6 +4,8 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
+
attr_accessor :user_agent, :browser
|
8
|
+
|
7
9
|
class << self
|
8
10
|
def load(source)
|
9
11
|
input = if source.is_a?(Hash)
|
@@ -89,6 +91,8 @@ module ScrapKit
|
|
89
91
|
end
|
90
92
|
|
91
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
92
96
|
if element&.respond_to?(:tag_name)
|
93
97
|
if element.tag_name.downcase == "input"
|
94
98
|
return element.attribute_value(:value)
|
@@ -125,6 +129,8 @@ module ScrapKit
|
|
125
129
|
|
126
130
|
output
|
127
131
|
end
|
132
|
+
elsif selector_or_object[:javascript]
|
133
|
+
@browser.execute_script(selector_or_object[:javascript])
|
128
134
|
else
|
129
135
|
found_elements = elements_from_selector(browser_or_element, selector_or_object)
|
130
136
|
|
@@ -137,6 +143,8 @@ module ScrapKit
|
|
137
143
|
end
|
138
144
|
end
|
139
145
|
end
|
146
|
+
rescue
|
147
|
+
nil
|
140
148
|
end
|
141
149
|
|
142
150
|
private
|
@@ -187,7 +195,7 @@ module ScrapKit
|
|
187
195
|
end
|
188
196
|
end
|
189
197
|
|
190
|
-
sleep
|
198
|
+
sleep 1
|
191
199
|
@browser.wait_until do
|
192
200
|
@browser.ready_state == "complete"
|
193
201
|
end
|
@@ -199,13 +207,17 @@ module ScrapKit
|
|
199
207
|
options.add_argument "--headless"
|
200
208
|
options.add_argument "--window-size=1080x720"
|
201
209
|
options.add_argument "--hide-scrollbars"
|
210
|
+
options.add_argument "--user-agent=#{@user_agent}" if @user_agent
|
202
211
|
|
203
212
|
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
204
213
|
options.add_argument "--no-sandbox"
|
205
214
|
options.binary = chrome_bin
|
206
215
|
end
|
207
216
|
|
208
|
-
Watir::Browser.new(:chrome, options: options)
|
217
|
+
new_browser = Watir::Browser.new(:chrome, options: options)
|
218
|
+
new_browser.driver.manage.timeouts.page_load = 120
|
219
|
+
|
220
|
+
new_browser
|
209
221
|
end
|
210
222
|
end
|
211
223
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|