scrap_kit 0.1.11 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +5 -5
- data/lib/scrap_kit/recipe.rb +13 -3
- data/lib/scrap_kit/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3db092bd833a6c977d64468c38c6361922498683606f3ca543a3c7fdb5a91a1
|
4
|
+
data.tar.gz: 865ca828a23e0c2ce43f9070ce85e199979774964d10018881befd504925608f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e25dc4878540dd64d4d436a07d70f250ef4c6451f2586758324ad819d256af190c7e8f72a3b113722911649485cdf2a9793186e607fa3ff159ee992b424d6f5
|
7
|
+
data.tar.gz: 2de4cd58ddc9f46f1a9d198d95163900bf1394e87ed9733a444693a1775f0d59a72793212fd70d2a3656005d5739f730109e33375733e28146f2ee3bedeecaa2
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.16] 2020-11-20
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Return `src` value for `img` elements
|
7
|
+
|
8
|
+
## [0.1.15] 2020-10-25
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Add `--disable-dev-shm-usage` arguments to Chrome driver
|
12
|
+
|
13
|
+
## [0.1.14] 2020-10-10
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Increase timeout for Chrome driver
|
17
|
+
|
18
|
+
## [0.1.13] 2020-09-04
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
22
|
+
|
23
|
+
## [0.1.12] 2020-09-03
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Return nil if `extract_attribute` fails
|
27
|
+
|
3
28
|
## [0.1.11] 2020-09-03
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.16)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -9,7 +9,7 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.3.
|
12
|
+
activesupport (6.0.3.4)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
@@ -25,7 +25,7 @@ GEM
|
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
28
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.8.2)
|
29
29
|
rspec (3.9.0)
|
30
30
|
rspec-core (~> 3.9.0)
|
31
31
|
rspec-expectations (~> 3.9.0)
|
@@ -44,7 +44,7 @@ GEM
|
|
44
44
|
childprocess (>= 0.5, < 4.0)
|
45
45
|
rubyzip (>= 1.2.2)
|
46
46
|
thread_safe (0.3.6)
|
47
|
-
tzinfo (1.2.
|
47
|
+
tzinfo (1.2.8)
|
48
48
|
thread_safe (~> 0.1)
|
49
49
|
watir (6.16.5)
|
50
50
|
regexp_parser (~> 1.2)
|
@@ -53,7 +53,7 @@ GEM
|
|
53
53
|
nokogiri (~> 1.6)
|
54
54
|
rubyzip (>= 1.3.0)
|
55
55
|
selenium-webdriver (>= 3.0, < 4.0)
|
56
|
-
zeitwerk (2.4.
|
56
|
+
zeitwerk (2.4.1)
|
57
57
|
|
58
58
|
PLATFORMS
|
59
59
|
ruby
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,7 +4,7 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
-
attr_accessor :user_agent
|
7
|
+
attr_accessor :user_agent, :browser
|
8
8
|
|
9
9
|
class << self
|
10
10
|
def load(source)
|
@@ -91,9 +91,13 @@ module ScrapKit
|
|
91
91
|
end
|
92
92
|
|
93
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
94
96
|
if element&.respond_to?(:tag_name)
|
95
97
|
if element.tag_name.downcase == "input"
|
96
98
|
return element.attribute_value(:value)
|
99
|
+
elsif element.tag_name.downcase == "img"
|
100
|
+
return element.attribute_value(:src)
|
97
101
|
end
|
98
102
|
end
|
99
103
|
|
@@ -141,6 +145,8 @@ module ScrapKit
|
|
141
145
|
end
|
142
146
|
end
|
143
147
|
end
|
148
|
+
rescue
|
149
|
+
nil
|
144
150
|
end
|
145
151
|
|
146
152
|
private
|
@@ -191,7 +197,7 @@ module ScrapKit
|
|
191
197
|
end
|
192
198
|
end
|
193
199
|
|
194
|
-
sleep
|
200
|
+
sleep 1
|
195
201
|
@browser.wait_until do
|
196
202
|
@browser.ready_state == "complete"
|
197
203
|
end
|
@@ -207,10 +213,14 @@ module ScrapKit
|
|
207
213
|
|
208
214
|
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
209
215
|
options.add_argument "--no-sandbox"
|
216
|
+
options.add_argument "--disable-dev-shm-usage"
|
210
217
|
options.binary = chrome_bin
|
211
218
|
end
|
212
219
|
|
213
|
-
Watir::Browser.new(:chrome, options: options)
|
220
|
+
new_browser = Watir::Browser.new(:chrome, options: options)
|
221
|
+
new_browser.driver.manage.timeouts.page_load = 120
|
222
|
+
|
223
|
+
new_browser
|
214
224
|
end
|
215
225
|
end
|
216
226
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
144
|
requirements: []
|
145
|
-
rubygems_version: 3.1.
|
145
|
+
rubygems_version: 3.1.4
|
146
146
|
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Scrap web sites using recipes.
|