scrap_kit 0.1.8 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Gemfile.lock +2 -2
- data/lib/scrap_kit/recipe.rb +26 -2
- data/lib/scrap_kit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3c0ad11b6a5a21a15c49f4203e2256ebac9d83fa9ed8de4060edd7b49ed6513
|
4
|
+
data.tar.gz: 4a9aa63a96b9bfda89f578827a3f7df5f03458d9a8191911d5e2ffa723ac8913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c21f2fc8782ca5552f92020cbf766f982cbbc1621256a0885340507e74b73d895086343111f65b3a7c6aba23771e345b8e57f962bcb3d1ab77a56392006469e4
|
7
|
+
data.tar.gz: bece24176f75eb7113fd5a8b0d1aaf151fa4d61ddd3bd697e9b4e53d2390888ef38fee6f9761ac3c02b8671710b8816b44a8236ef9f02cf547a95eff28796f88
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.13] 2020-09-04
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Return nil if element doesn't exist for `extract_value_from_element`
|
7
|
+
|
8
|
+
## [0.1.12] 2020-09-03
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Return nil if `extract_attribute` fails
|
12
|
+
|
13
|
+
## [0.1.11] 2020-09-03
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Add `user_agent` accessor for browser
|
17
|
+
|
18
|
+
## [0.1.10] 2020-09-03
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Map attributes to JavaScript calls
|
22
|
+
|
23
|
+
## [0.1.9] 2020-08-31
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Set arguments for Chrome driver
|
27
|
+
|
3
28
|
## [0.1.8] 2020-08-29
|
4
29
|
|
5
30
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.13)
|
5
5
|
activesupport (~> 6.0)
|
6
6
|
watir (~> 6.16.5)
|
7
7
|
webdrivers (~> 4.0)
|
@@ -21,7 +21,7 @@ GEM
|
|
21
21
|
i18n (1.8.5)
|
22
22
|
concurrent-ruby (~> 1.0)
|
23
23
|
mini_portile2 (2.4.0)
|
24
|
-
minitest (5.14.
|
24
|
+
minitest (5.14.2)
|
25
25
|
nokogiri (1.10.10)
|
26
26
|
mini_portile2 (~> 2.4.0)
|
27
27
|
rake (13.0.1)
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -4,6 +4,8 @@ require "watir"
|
|
4
4
|
|
5
5
|
module ScrapKit
|
6
6
|
class Recipe
|
7
|
+
attr_accessor :user_agent
|
8
|
+
|
7
9
|
class << self
|
8
10
|
def load(source)
|
9
11
|
input = if source.is_a?(Hash)
|
@@ -27,7 +29,7 @@ module ScrapKit
|
|
27
29
|
def run
|
28
30
|
output = {}
|
29
31
|
|
30
|
-
@browser =
|
32
|
+
@browser = create_browser
|
31
33
|
@browser.goto @url
|
32
34
|
|
33
35
|
@steps.each do |step|
|
@@ -89,6 +91,8 @@ module ScrapKit
|
|
89
91
|
end
|
90
92
|
|
91
93
|
def extract_value_from_element(element)
|
94
|
+
return nil unless element.exists?
|
95
|
+
|
92
96
|
if element&.respond_to?(:tag_name)
|
93
97
|
if element.tag_name.downcase == "input"
|
94
98
|
return element.attribute_value(:value)
|
@@ -125,6 +129,8 @@ module ScrapKit
|
|
125
129
|
|
126
130
|
output
|
127
131
|
end
|
132
|
+
elsif selector_or_object[:javascript]
|
133
|
+
@browser.execute_script(selector_or_object[:javascript])
|
128
134
|
else
|
129
135
|
found_elements = elements_from_selector(browser_or_element, selector_or_object)
|
130
136
|
|
@@ -137,6 +143,8 @@ module ScrapKit
|
|
137
143
|
end
|
138
144
|
end
|
139
145
|
end
|
146
|
+
rescue
|
147
|
+
nil
|
140
148
|
end
|
141
149
|
|
142
150
|
private
|
@@ -187,10 +195,26 @@ module ScrapKit
|
|
187
195
|
end
|
188
196
|
end
|
189
197
|
|
190
|
-
sleep
|
198
|
+
sleep 1
|
191
199
|
@browser.wait_until do
|
192
200
|
@browser.ready_state == "complete"
|
193
201
|
end
|
194
202
|
end
|
203
|
+
|
204
|
+
def create_browser
|
205
|
+
options = Selenium::WebDriver::Chrome::Options.new
|
206
|
+
|
207
|
+
options.add_argument "--headless"
|
208
|
+
options.add_argument "--window-size=1080x720"
|
209
|
+
options.add_argument "--hide-scrollbars"
|
210
|
+
options.add_argument "--user-agent=#{@user_agent}" if @user_agent
|
211
|
+
|
212
|
+
if chrome_bin = ENV["GOOGLE_CHROME_SHIM"]
|
213
|
+
options.add_argument "--no-sandbox"
|
214
|
+
options.binary = chrome_bin
|
215
|
+
end
|
216
|
+
|
217
|
+
Watir::Browser.new(:chrome, options: options)
|
218
|
+
end
|
195
219
|
end
|
196
220
|
end
|
data/lib/scrap_kit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|