scrap_kit 0.1.10 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +3 -3
- data/lib/scrap_kit/recipe.rb +13 -2
- data/lib/scrap_kit/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 96b093d466ebf16f066524ee5ffe6d462fa8b6f5fb0cf4839e58ccb4ea0a64e7
|
4
|
+
data.tar.gz: b720216ab63263c2d28842285d2fd1a7aab6e0634e51098e9de9a2e8d471e1bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 354f88e8f875eca5a45ef557501005752e5ca8e1abbb4bd6933f7640ddf9f64a31f33d766eaa9e4b154da59cf6a53cb5a065795e597b3b213ea6c7610260da6d
|
7
|
+
data.tar.gz: cefdf23aa7b1c104992c125e40deb9637269f1c8f29e56c2e9bfbee98e9f59000a8a4c3f7c6a5c710a6e2dc3cc0a8208de5ab863bfaf59533a4095cf9721a4a3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.15] 2020-10-25
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Add `--disable-dev-shm-usage` arguments to Chrome driver
|
7
|
+
|
8
|
+
## [0.1.14] 2020-10-10
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Increase timeout for Chrome driver
|
12
|
+
|
13
|
+
## [0.1.13] 2020-09-04
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
17
|
+
|
18
|
+
## [0.1.12] 2020-09-03
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Return nil if `extract_attribute` fails
|
22
|
+
|
23
|
+
## [0.1.11] 2020-09-03
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Add `user_agent` accessor for browser
|
27
|
+
|
3
28
|
## [0.1.10] 2020-09-03
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.15)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -9,7 +9,7 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.3.
|
12
|
+
activesupport (6.0.3.4)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
@@ -25,7 +25,7 @@ GEM
|
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
28
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.8.2)
|
29
29
|
rspec (3.9.0)
|
30
30
|
rspec-core (~> 3.9.0)
|
31
31
|
rspec-expectations (~> 3.9.0)
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,6 +4,8 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
+
attr_accessor :user_agent, :browser
|
8
|
+
|
7
9
|
class << self
|
8
10
|
def load(source)
|
9
11
|
input = if source.is_a?(Hash)
|
@@ -89,6 +91,8 @@ module ScrapKit
|
|
89
91
|
end
|
90
92
|
|
91
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
92
96
|
if element&.respond_to?(:tag_name)
|
93
97
|
if element.tag_name.downcase == "input"
|
94
98
|
return element.attribute_value(:value)
|
@@ -139,6 +143,8 @@ module ScrapKit
|
|
139
143
|
end
|
140
144
|
end
|
141
145
|
end
|
146
|
+
rescue
|
147
|
+
nil
|
142
148
|
end
|
143
149
|
|
144
150
|
private
|
@@ -189,7 +195,7 @@ module ScrapKit
|
|
189
195
|
end
|
190
196
|
end
|
191
197
|
|
192
|
-
sleep
|
198
|
+
sleep 1
|
193
199
|
@browser.wait_until do
|
194
200
|
@browser.ready_state == "complete"
|
195
201
|
end
|
@@ -201,13 +207,18 @@ module ScrapKit
|
|
201
207
|
options.add_argument "--headless"
|
202
208
|
options.add_argument "--window-size=1080x720"
|
203
209
|
options.add_argument "--hide-scrollbars"
|
210
|
+
options.add_argument "--user-agent=#{@user_agent}" if @user_agent
|
204
211
|
|
205
212
|
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
206
213
|
options.add_argument "--no-sandbox"
|
214
|
+
options.add_argument "--disable-dev-shm-usage"
|
207
215
|
options.binary = chrome_bin
|
208
216
|
end
|
209
217
|
|
210
|
-
Watir::Browser.new(:chrome, options: options)
|
218
|
+
new_browser = Watir::Browser.new(:chrome, options: options)
|
219
|
+
new_browser.driver.manage.timeouts.page_load = 120
|
220
|
+
|
221
|
+
new_browser
|
211
222
|
end
|
212
223
|
end
|
213
224
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
144
|
requirements: []
|
145
|
-
rubygems_version: 3.1.
|
145
|
+
rubygems_version: 3.1.4
|
146
146
|
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Scrap web sites using recipes.
|