scrap_kit 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/publish_gem.yml +2 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +26 -0
- data/Gemfile.lock +23 -15
- data/lib/scrap_kit/recipe.rb +33 -2
- data/lib/scrap_kit/version.rb +1 -1
- data/scrap_kit.gemspec +4 -3
- metadata +29 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0800dc31098f3961b39783d64ee7ddfbecc67a3eba2efaa1fdda7c384bf0b88b'
|
4
|
+
data.tar.gz: bd6e32addeef4d04545d092aeef0b45e9e60dfedf1dc9643ae70e8e50512a589
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8bee9dc8ed755c2edd4b5c6968ce0e9aae4f4a662963838e9493e9ed125b59ca45c95e18af9e9724730d37ada2c39b8b9ebeef95dc1bb69cb3161813770343a
|
7
|
+
data.tar.gz: a6a86b4ea05fe89541db1dcd82dd768e5b31c7185a7659901324dff13c517e82d4faf3c414165b84b58d19f0e3316ca87493dfe66a6325cc8e43aabaa16ce38e
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,31 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.5] 2020-08-08
|
4
|
+
|
5
|
+
### Changed/Added
|
6
|
+
- Add `webdrivers` as dependency
|
7
|
+
|
8
|
+
## [0.1.4] 2020-07-12
|
9
|
+
|
10
|
+
### Changed/Added
|
11
|
+
- Add support for `<input />` elements
|
12
|
+
|
13
|
+
## [0.1.3] 2020-06-18
|
14
|
+
|
15
|
+
### Changed/Added
|
16
|
+
- Moved development dependencies as dependencies
|
17
|
+
|
18
|
+
## [0.1.2] 2020-06-18
|
19
|
+
|
20
|
+
### Changed/Added
|
21
|
+
- Add support for selector arrays to filter elements based on conditions
|
22
|
+
|
23
|
+
## [0.1.1] 2020-05-18
|
24
|
+
|
25
|
+
### Changed/Added
|
26
|
+
- Update activesupport requirement from = 6.0.2.1 to = 6.0.3.1
|
27
|
+
- Update rake requirement from ~> 10.0 to ~> 13.0
|
28
|
+
|
3
29
|
## [0.1.0] 2020-02-09
|
4
30
|
|
5
31
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,31 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.5)
|
5
|
+
activesupport (= 6.0.3.1)
|
6
|
+
watir (~> 6.16.5)
|
7
|
+
webdrivers (~> 4.0)
|
5
8
|
|
6
9
|
GEM
|
7
10
|
remote: https://rubygems.org/
|
8
11
|
specs:
|
9
|
-
activesupport (6.0.
|
12
|
+
activesupport (6.0.3.1)
|
10
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
11
14
|
i18n (>= 0.7, < 2)
|
12
15
|
minitest (~> 5.1)
|
13
16
|
tzinfo (~> 1.1)
|
14
|
-
zeitwerk (~> 2.2)
|
17
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
15
18
|
childprocess (3.0.0)
|
16
|
-
concurrent-ruby (1.1.
|
19
|
+
concurrent-ruby (1.1.6)
|
17
20
|
diff-lcs (1.3)
|
18
|
-
i18n (1.8.
|
21
|
+
i18n (1.8.5)
|
19
22
|
concurrent-ruby (~> 1.0)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
+
mini_portile2 (2.4.0)
|
24
|
+
minitest (5.14.1)
|
25
|
+
nokogiri (1.10.10)
|
26
|
+
mini_portile2 (~> 2.4.0)
|
27
|
+
rake (13.0.1)
|
28
|
+
regexp_parser (1.7.1)
|
23
29
|
rspec (3.9.0)
|
24
30
|
rspec-core (~> 3.9.0)
|
25
31
|
rspec-expectations (~> 3.9.0)
|
@@ -33,28 +39,30 @@ GEM
|
|
33
39
|
diff-lcs (>= 1.2.0, < 2.0)
|
34
40
|
rspec-support (~> 3.9.0)
|
35
41
|
rspec-support (3.9.2)
|
36
|
-
rubyzip (2.
|
42
|
+
rubyzip (2.3.0)
|
37
43
|
selenium-webdriver (3.142.7)
|
38
44
|
childprocess (>= 0.5, < 4.0)
|
39
45
|
rubyzip (>= 1.2.2)
|
40
46
|
thread_safe (0.3.6)
|
41
|
-
tzinfo (1.2.
|
47
|
+
tzinfo (1.2.7)
|
42
48
|
thread_safe (~> 0.1)
|
43
49
|
watir (6.16.5)
|
44
50
|
regexp_parser (~> 1.2)
|
45
51
|
selenium-webdriver (~> 3.6)
|
46
|
-
|
52
|
+
webdrivers (4.4.1)
|
53
|
+
nokogiri (~> 1.6)
|
54
|
+
rubyzip (>= 1.3.0)
|
55
|
+
selenium-webdriver (>= 3.0, < 4.0)
|
56
|
+
zeitwerk (2.4.0)
|
47
57
|
|
48
58
|
PLATFORMS
|
49
59
|
ruby
|
50
60
|
|
51
61
|
DEPENDENCIES
|
52
|
-
activesupport (= 6.0.2.1)
|
53
62
|
bundler (~> 2.0)
|
54
|
-
rake (~>
|
63
|
+
rake (~> 13.0)
|
55
64
|
rspec (~> 3.0)
|
56
65
|
scrap_kit!
|
57
|
-
watir (~> 6.16.5)
|
58
66
|
|
59
67
|
BUNDLED WITH
|
60
|
-
2.
|
68
|
+
2.1.4
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/core_ext/hash"
|
2
|
+
require "webdrivers/chromedriver"
|
2
3
|
require "watir"
|
3
4
|
|
4
5
|
module ScrapKit
|
@@ -32,14 +33,44 @@ module ScrapKit
|
|
32
33
|
def run_step(browser, step)
|
33
34
|
end
|
34
35
|
|
36
|
+
def elements_from_selector(browser_or_element, selector)
|
37
|
+
if selector.is_a?(String)
|
38
|
+
browser_or_element.elements(css: selector)
|
39
|
+
elsif selector.is_a?(Array)
|
40
|
+
*remainder, condition = selector
|
41
|
+
elements = browser_or_element
|
42
|
+
|
43
|
+
remainder.each do |item|
|
44
|
+
elements = elements.elements(css: item)
|
45
|
+
end
|
46
|
+
|
47
|
+
elements.filter do |element|
|
48
|
+
condition_key = condition.keys[0].to_s
|
49
|
+
condition_value = condition.values[0]
|
50
|
+
found_element = element.element(css: condition_key)
|
51
|
+
extract_value_from_element(found_element)&.match(condition_value)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def extract_value_from_element(element)
|
57
|
+
if element&.respond_to?(:tag_name)
|
58
|
+
if element.tag_name.downcase == "input"
|
59
|
+
return element.attribute_value(:value)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
element&.text_content
|
64
|
+
end
|
65
|
+
|
35
66
|
def extract_attribute(browser_or_element, selector_or_hash)
|
36
67
|
if selector_or_hash.is_a?(String)
|
37
|
-
browser_or_element.element(css: selector_or_hash)
|
68
|
+
extract_value_from_element(browser_or_element.element(css: selector_or_hash))
|
38
69
|
elsif selector_or_hash.is_a?(Hash)
|
39
70
|
selector = selector_or_hash[:selector]
|
40
71
|
selector_for_children_attributes = selector_or_hash[:children_attributes]
|
41
72
|
|
42
|
-
browser_or_element
|
73
|
+
elements_from_selector(browser_or_element, selector).map do |element|
|
43
74
|
output = {}
|
44
75
|
|
45
76
|
selector_for_children_attributes.each do |child_attribute_name, child_selector|
|
data/lib/scrap_kit/version.rb
CHANGED
data/scrap_kit.gemspec
CHANGED
@@ -29,8 +29,9 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.require_paths = ["lib"]
|
30
30
|
|
31
31
|
spec.add_development_dependency "bundler", "~> 2.0"
|
32
|
-
spec.add_development_dependency "rake", "~>
|
32
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
33
33
|
spec.add_development_dependency "rspec", "~> 3.0"
|
34
|
-
spec.
|
35
|
-
spec.
|
34
|
+
spec.add_dependency "watir", "~> 6.16.5"
|
35
|
+
spec.add_dependency "webdrivers", "~> 4.0"
|
36
|
+
spec.add_dependency "activesupport", "6.0.3.1"
|
36
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,33 +53,47 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: watir
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 6.
|
62
|
-
type: :
|
61
|
+
version: 6.16.5
|
62
|
+
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 6.
|
68
|
+
version: 6.16.5
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: webdrivers
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
75
|
+
version: '4.0'
|
76
|
+
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: '4.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 6.0.3.1
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 6.0.3.1
|
83
97
|
description: Run JSON-based recipes to scrap web sites.
|
84
98
|
email:
|
85
99
|
- hpneo@hotmail.com
|
@@ -128,7 +142,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
128
142
|
- !ruby/object:Gem::Version
|
129
143
|
version: '0'
|
130
144
|
requirements: []
|
131
|
-
rubygems_version: 3.
|
145
|
+
rubygems_version: 3.1.2
|
132
146
|
signing_key:
|
133
147
|
specification_version: 4
|
134
148
|
summary: Scrap web sites using recipes.
|