scrap_kit 0.1.1 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/run_tests.yml +20 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +30 -0
- data/Gemfile.lock +21 -13
- data/lib/scrap_kit/recipe.rb +33 -2
- data/lib/scrap_kit/version.rb +1 -1
- data/scrap_kit.gemspec +3 -2
- metadata +27 -13
- data/.github/workflows/publish_gem.yml +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ffb35087e7e374b2e5fa15d2f9c2d41374d0242e4f2b5a986db195929a530dc6
|
4
|
+
data.tar.gz: 904246fc2061a3ec1686a70d6755fd81f13b215b3dce7816177f0774af666b89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c55754d5f772f7bfd9c9bd4fdda802f77a898f6c86f0956d9f94d9c81e7151fdaed07ec45f56d0b7010eb56dc7a1a94e58079bcb1da4753586766eefd4bd4954
|
7
|
+
data.tar.gz: b20fc3e3b8852361e707d4597ec510d44cb3d76a9260f6d719e2547adf5a68abafeb7423f02c47ba853a4f92ee43df3ccfc88bb60dd21913249ef8b62613a53c
|
@@ -0,0 +1,20 @@
|
|
1
|
+
name: Run tests
|
2
|
+
|
3
|
+
on: [push]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
test:
|
7
|
+
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
|
10
|
+
steps:
|
11
|
+
- uses: actions/checkout@v1
|
12
|
+
- name: Setup Ruby
|
13
|
+
uses: actions/setup-ruby@v1
|
14
|
+
with:
|
15
|
+
ruby-version: '2.7.1'
|
16
|
+
- name: Build and run tests
|
17
|
+
run: |
|
18
|
+
gem install bundler
|
19
|
+
bundle update --conservative --jobs 4 --retry 3
|
20
|
+
bundle exec rake spec
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [0.1.6] 2020-08-28
|
4
|
+
- Update activesupport requirement from = 6.0.2.1 to ~> 6.0
|
5
|
+
- Fix bug when matching selector condition
|
6
|
+
|
7
|
+
## [0.1.5] 2020-08-08
|
8
|
+
|
9
|
+
### Changed/Added
|
10
|
+
- Add `webdrivers` as dependency
|
11
|
+
|
12
|
+
## [0.1.4] 2020-07-12
|
13
|
+
|
14
|
+
### Changed/Added
|
15
|
+
- Add support for `<input />` elements
|
16
|
+
|
17
|
+
## [0.1.3] 2020-06-18
|
18
|
+
|
19
|
+
### Changed/Added
|
20
|
+
- Moved development dependencies as dependencies
|
21
|
+
|
22
|
+
## [0.1.2] 2020-06-18
|
23
|
+
|
24
|
+
### Changed/Added
|
25
|
+
- Add support for selector arrays to filter elements based on conditions
|
26
|
+
|
27
|
+
## [0.1.1] 2020-05-18
|
28
|
+
|
29
|
+
### Changed/Added
|
30
|
+
- Update activesupport requirement from = 6.0.2.1 to = 6.0.3.1
|
31
|
+
- Update rake requirement from ~> 10.0 to ~> 13.0
|
32
|
+
|
3
33
|
## [0.1.0] 2020-02-09
|
4
34
|
|
5
35
|
### Changed/Added
|
data/Gemfile.lock
CHANGED
@@ -1,25 +1,31 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scrap_kit (0.1.
|
4
|
+
scrap_kit (0.1.6)
|
5
|
+
activesupport (~> 6.0)
|
6
|
+
watir (~> 6.16.5)
|
7
|
+
webdrivers (~> 4.0)
|
5
8
|
|
6
9
|
GEM
|
7
10
|
remote: https://rubygems.org/
|
8
11
|
specs:
|
9
|
-
activesupport (6.0.2
|
12
|
+
activesupport (6.0.3.2)
|
10
13
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
11
14
|
i18n (>= 0.7, < 2)
|
12
15
|
minitest (~> 5.1)
|
13
16
|
tzinfo (~> 1.1)
|
14
|
-
zeitwerk (~> 2.2)
|
17
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
15
18
|
childprocess (3.0.0)
|
16
|
-
concurrent-ruby (1.1.
|
19
|
+
concurrent-ruby (1.1.7)
|
17
20
|
diff-lcs (1.3)
|
18
|
-
i18n (1.8.
|
21
|
+
i18n (1.8.5)
|
19
22
|
concurrent-ruby (~> 1.0)
|
20
|
-
|
23
|
+
mini_portile2 (2.4.0)
|
24
|
+
minitest (5.14.1)
|
25
|
+
nokogiri (1.10.10)
|
26
|
+
mini_portile2 (~> 2.4.0)
|
21
27
|
rake (13.0.1)
|
22
|
-
regexp_parser (1.
|
28
|
+
regexp_parser (1.7.1)
|
23
29
|
rspec (3.9.0)
|
24
30
|
rspec-core (~> 3.9.0)
|
25
31
|
rspec-expectations (~> 3.9.0)
|
@@ -33,28 +39,30 @@ GEM
|
|
33
39
|
diff-lcs (>= 1.2.0, < 2.0)
|
34
40
|
rspec-support (~> 3.9.0)
|
35
41
|
rspec-support (3.9.2)
|
36
|
-
rubyzip (2.
|
42
|
+
rubyzip (2.3.0)
|
37
43
|
selenium-webdriver (3.142.7)
|
38
44
|
childprocess (>= 0.5, < 4.0)
|
39
45
|
rubyzip (>= 1.2.2)
|
40
46
|
thread_safe (0.3.6)
|
41
|
-
tzinfo (1.2.
|
47
|
+
tzinfo (1.2.7)
|
42
48
|
thread_safe (~> 0.1)
|
43
49
|
watir (6.16.5)
|
44
50
|
regexp_parser (~> 1.2)
|
45
51
|
selenium-webdriver (~> 3.6)
|
46
|
-
|
52
|
+
webdrivers (4.4.1)
|
53
|
+
nokogiri (~> 1.6)
|
54
|
+
rubyzip (>= 1.3.0)
|
55
|
+
selenium-webdriver (>= 3.0, < 4.0)
|
56
|
+
zeitwerk (2.4.0)
|
47
57
|
|
48
58
|
PLATFORMS
|
49
59
|
ruby
|
50
60
|
|
51
61
|
DEPENDENCIES
|
52
|
-
activesupport (= 6.0.2.1)
|
53
62
|
bundler (~> 2.0)
|
54
63
|
rake (~> 13.0)
|
55
64
|
rspec (~> 3.0)
|
56
65
|
scrap_kit!
|
57
|
-
watir (~> 6.16.5)
|
58
66
|
|
59
67
|
BUNDLED WITH
|
60
|
-
2.
|
68
|
+
2.1.4
|
data/lib/scrap_kit/recipe.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "active_support/core_ext/hash"
|
2
|
+
require "webdrivers/chromedriver"
|
2
3
|
require "watir"
|
3
4
|
|
4
5
|
module ScrapKit
|
@@ -32,14 +33,44 @@ module ScrapKit
|
|
32
33
|
def run_step(browser, step)
|
33
34
|
end
|
34
35
|
|
36
|
+
def elements_from_selector(browser_or_element, selector)
|
37
|
+
if selector.is_a?(String)
|
38
|
+
browser_or_element.elements(css: selector)
|
39
|
+
elsif selector.is_a?(Array)
|
40
|
+
*remainder, condition = selector
|
41
|
+
elements = browser_or_element
|
42
|
+
|
43
|
+
remainder.each do |item|
|
44
|
+
elements = elements.elements(css: item)
|
45
|
+
end
|
46
|
+
|
47
|
+
elements.filter do |element|
|
48
|
+
condition_key = condition.keys.first.to_s
|
49
|
+
condition_value = condition.values.first
|
50
|
+
found_element = element.element(css: condition_key)
|
51
|
+
extract_value_from_element(found_element) == condition_value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def extract_value_from_element(element)
|
57
|
+
if element&.respond_to?(:tag_name)
|
58
|
+
if element.tag_name.downcase == "input"
|
59
|
+
return element.attribute_value(:value)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
element&.text_content
|
64
|
+
end
|
65
|
+
|
35
66
|
def extract_attribute(browser_or_element, selector_or_hash)
|
36
67
|
if selector_or_hash.is_a?(String)
|
37
|
-
browser_or_element.element(css: selector_or_hash)
|
68
|
+
extract_value_from_element(browser_or_element.element(css: selector_or_hash))
|
38
69
|
elsif selector_or_hash.is_a?(Hash)
|
39
70
|
selector = selector_or_hash[:selector]
|
40
71
|
selector_for_children_attributes = selector_or_hash[:children_attributes]
|
41
72
|
|
42
|
-
browser_or_element
|
73
|
+
elements_from_selector(browser_or_element, selector).map do |element|
|
43
74
|
output = {}
|
44
75
|
|
45
76
|
selector_for_children_attributes.each do |child_attribute_name, child_selector|
|
data/lib/scrap_kit/version.rb
CHANGED
data/scrap_kit.gemspec
CHANGED
@@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_development_dependency "bundler", "~> 2.0"
|
32
32
|
spec.add_development_dependency "rake", "~> 13.0"
|
33
33
|
spec.add_development_dependency "rspec", "~> 3.0"
|
34
|
-
spec.
|
35
|
-
spec.
|
34
|
+
spec.add_dependency "watir", "~> 6.16.5"
|
35
|
+
spec.add_dependency "webdrivers", "~> 4.0"
|
36
|
+
spec.add_dependency "activesupport", "~> 6.0"
|
36
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrap_kit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gustavo Leon
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -53,33 +53,47 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: watir
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 6.
|
62
|
-
type: :
|
61
|
+
version: 6.16.5
|
62
|
+
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 6.
|
68
|
+
version: 6.16.5
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: webdrivers
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
75
|
+
version: '4.0'
|
76
|
+
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: '4.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '6.0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '6.0'
|
83
97
|
description: Run JSON-based recipes to scrap web sites.
|
84
98
|
email:
|
85
99
|
- hpneo@hotmail.com
|
@@ -87,7 +101,7 @@ executables: []
|
|
87
101
|
extensions: []
|
88
102
|
extra_rdoc_files: []
|
89
103
|
files:
|
90
|
-
- ".github/workflows/
|
104
|
+
- ".github/workflows/run_tests.yml"
|
91
105
|
- ".gitignore"
|
92
106
|
- ".rspec"
|
93
107
|
- ".rubocop.yml"
|