scrap_kit 0.1.11 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +5 -5
- data/lib/scrap_kit/recipe.rb +13 -3
- data/lib/scrap_kit/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3db092bd833a6c977d64468c38c6361922498683606f3ca543a3c7fdb5a91a1
|
4
|
+
data.tar.gz: 865ca828a23e0c2ce43f9070ce85e199979774964d10018881befd504925608f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e25dc4878540dd64d4d436a07d70f250ef4c6451f2586758324ad819d256af190c7e8f72a3b113722911649485cdf2a9793186e607fa3ff159ee992b424d6f5
|
7
|
+
data.tar.gz: 2de4cd58ddc9f46f1a9d198d95163900bf1394e87ed9733a444693a1775f0d59a72793212fd70d2a3656005d5739f730109e33375733e28146f2ee3bedeecaa2
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.16] 2020-11-20
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Return `src` value for `img` elements
|
7
|
+
|
8
|
+
## [0.1.15] 2020-10-25
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Add `--disable-dev-shm-usage` arguments to Chrome driver
|
12
|
+
|
13
|
+
## [0.1.14] 2020-10-10
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Increase timeout for Chrome driver
|
17
|
+
|
18
|
+
## [0.1.13] 2020-09-04
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
22
|
+
|
23
|
+
## [0.1.12] 2020-09-03
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Return nil if `extract_attribute` fails
|
27
|
+
|
3
28
|
## [0.1.11] 2020-09-03
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.16)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -9,7 +9,7 @@ PATH
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
activesupport (6.0.3.
|
12
|
+
activesupport (6.0.3.4)
|
13
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
14
14
|
i18n (>= 0.7, < 2)
|
15
15
|
minitest (~> 5.1)
|
@@ -25,7 +25,7 @@ GEM
|
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
28
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.8.2)
|
29
29
|
rspec (3.9.0)
|
30
30
|
rspec-core (~> 3.9.0)
|
31
31
|
rspec-expectations (~> 3.9.0)
|
@@ -44,7 +44,7 @@ GEM
|
|
44
44
|
childprocess (>= 0.5, < 4.0)
|
45
45
|
rubyzip (>= 1.2.2)
|
46
46
|
thread_safe (0.3.6)
|
47
|
-
tzinfo (1.2.
|
47
|
+
tzinfo (1.2.8)
|
48
48
|
thread_safe (~> 0.1)
|
49
49
|
watir (6.16.5)
|
50
50
|
regexp_parser (~> 1.2)
|
@@ -53,7 +53,7 @@ GEM
|
|
53
53
|
nokogiri (~> 1.6)
|
54
54
|
rubyzip (>= 1.3.0)
|
55
55
|
selenium-webdriver (>= 3.0, < 4.0)
|
56
|
-
zeitwerk (2.4.
|
56
|
+
zeitwerk (2.4.1)
|
57
57
|
|
58
58
|
PLATFORMS
|
59
59
|
ruby
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,7 +4,7 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
-
attr_accessor :user_agent
|
7
|
+
attr_accessor :user_agent, :browser
|
8
8
|
|
9
9
|
class << self
|
10
10
|
def load(source)
|
@@ -91,9 +91,13 @@ module ScrapKit
|
|
91
91
|
end
|
92
92
|
|
93
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
94
96
|
if element&.respond_to?(:tag_name)
|
95
97
|
if element.tag_name.downcase == "input"
|
96
98
|
return element.attribute_value(:value)
|
99
|
+
elsif element.tag_name.downcase == "img"
|
100
|
+
return element.attribute_value(:src)
|
97
101
|
end
|
98
102
|
end
|
99
103
|
|
@@ -141,6 +145,8 @@ module ScrapKit
|
|
141
145
|
end
|
142
146
|
end
|
143
147
|
end
|
148
|
+
rescue
|
149
|
+
nil
|
144
150
|
end
|
145
151
|
|
146
152
|
private
|
@@ -191,7 +197,7 @@ module ScrapKit
|
|
191
197
|
end
|
192
198
|
end
|
193
199
|
|
194
|
-
sleep
|
200
|
+
sleep 1
|
195
201
|
@browser.wait_until do
|
196
202
|
@browser.ready_state == "complete"
|
197
203
|
end
|
@@ -207,10 +213,14 @@ module ScrapKit
|
|
207
213
|
|
208
214
|
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
209
215
|
options.add_argument "--no-sandbox"
|
216
|
+
options.add_argument "--disable-dev-shm-usage"
|
210
217
|
options.binary = chrome_bin
|
211
218
|
end
|
212
219
|
|
213
|
-
Watir::Browser.new(:chrome, options: options)
|
220
|
+
new_browser = Watir::Browser.new(:chrome, options: options)
|
221
|
+
new_browser.driver.manage.timeouts.page_load = 120
|
222
|
+
|
223
|
+
new_browser
|
214
224
|
end
|
215
225
|
end
|
216
226
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -142,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
142
|
- !ruby/object:Gem::Version
|
143
143
|
version: '0'
|
144
144
|
requirements: []
|
145
|
-
rubygems_version: 3.1.
|
145
|
+
rubygems_version: 3.1.4
|
146
146
|
signing_key:
|
147
147
|
specification_version: 4
|
148
148
|
summary: Scrap web sites using recipes.
|