paginae 0.1.2 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -5
- data/Gemfile +3 -4
- data/Gemfile.lock +8 -8
- data/README.md +1 -0
- data/lib/paginae/attribute_injector.rb +10 -89
- data/lib/paginae/memoizer.rb +13 -0
- data/lib/paginae/node_builder.rb +33 -0
- data/lib/paginae/reader_builder/array.rb +28 -0
- data/lib/paginae/reader_builder/hash.rb +24 -0
- data/lib/paginae/reader_builder/text.rb +20 -0
- data/lib/paginae/reader_builder/value.rb +21 -0
- data/lib/paginae/reader_builder.rb +30 -0
- data/lib/paginae/space_striper.rb +17 -0
- data/lib/paginae/version.rb +1 -1
- data/lib/paginae.rb +4 -0
- metadata +10 -3
- data/.rubocop_todo.yml +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fce7acf7bb8f43e9ca22ce7084e0b720811acff7fe28001261256357d93829a
|
4
|
+
data.tar.gz: ceaa9ed9c9c76ce777325b7de623157b1bbef37ee4c2d5aec0951bb2aa565085
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5158c1120fb6f0e3ce8021cee76297ddb5745c0d55a1706507602f39635f4a865d1bde0219199cfe8e97d36a05a444309ffbf686945eb2df8df650488764939b
|
7
|
+
data.tar.gz: 5f806d1ad1be0b8d5f8391a80f7b29ebe3901e494c1d005ca6c0da9c2de33f4982e5d135460570ebc9190e0bdb2eb5bdea5837cedadb5ccc7a506a2beea075f5
|
data/.rubocop.yml
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
inherit_from: .rubocop_todo.yml
|
2
|
-
|
3
1
|
AllCops:
|
4
2
|
TargetRubyVersion: 2.7
|
3
|
+
SuggestExtensions: false
|
5
4
|
NewCops: enable
|
6
5
|
|
6
|
+
Style/Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
7
9
|
Style/StringLiterals:
|
8
10
|
Enabled: true
|
9
11
|
EnforcedStyle: double_quotes
|
@@ -13,7 +15,11 @@ Style/StringLiteralsInInterpolation:
|
|
13
15
|
EnforcedStyle: double_quotes
|
14
16
|
|
15
17
|
Layout/LineLength:
|
16
|
-
|
18
|
+
Exclude:
|
19
|
+
- test/**/*.rb
|
17
20
|
|
18
|
-
|
19
|
-
|
21
|
+
Metrics/MethodLength:
|
22
|
+
Max: 16
|
23
|
+
|
24
|
+
Metrics/CyclomaticComplexity:
|
25
|
+
Max: 9
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
paginae (0.1.
|
4
|
+
paginae (0.1.4)
|
5
5
|
nokogiri (>= 1.13.4)
|
6
6
|
|
7
7
|
GEM
|
@@ -27,20 +27,20 @@ GEM
|
|
27
27
|
racc (1.6.0)
|
28
28
|
rainbow (3.1.1)
|
29
29
|
rake (13.0.6)
|
30
|
-
regexp_parser (2.
|
30
|
+
regexp_parser (2.5.0)
|
31
31
|
reline (0.3.1)
|
32
32
|
io-console (~> 0.5)
|
33
33
|
rexml (3.2.5)
|
34
|
-
rubocop (1.
|
34
|
+
rubocop (1.30.0)
|
35
35
|
parallel (~> 1.10)
|
36
36
|
parser (>= 3.1.0.0)
|
37
37
|
rainbow (>= 2.2.2, < 4.0)
|
38
38
|
regexp_parser (>= 1.8, < 3.0)
|
39
|
-
rexml
|
40
|
-
rubocop-ast (>= 1.
|
39
|
+
rexml (>= 3.2.5, < 4.0)
|
40
|
+
rubocop-ast (>= 1.18.0, < 2.0)
|
41
41
|
ruby-progressbar (~> 1.7)
|
42
42
|
unicode-display_width (>= 1.4.0, < 3.0)
|
43
|
-
rubocop-ast (1.
|
43
|
+
rubocop-ast (1.18.0)
|
44
44
|
parser (>= 3.1.1.0)
|
45
45
|
ruby-progressbar (1.11.0)
|
46
46
|
unicode-display_width (2.1.0)
|
@@ -52,10 +52,10 @@ PLATFORMS
|
|
52
52
|
|
53
53
|
DEPENDENCIES
|
54
54
|
debug (~> 1.5)
|
55
|
-
minitest (~> 5.
|
55
|
+
minitest (~> 5.15)
|
56
56
|
paginae!
|
57
57
|
rake (~> 13.0)
|
58
|
-
rubocop (~> 1.
|
58
|
+
rubocop (~> 1.29)
|
59
59
|
|
60
60
|
BUNDLED WITH
|
61
61
|
2.3.7
|
data/README.md
CHANGED
@@ -30,6 +30,7 @@ class ProductPage
|
|
30
30
|
attribute :name, id: "product-title"
|
31
31
|
attribute :model, xpath: "//h2[@class='product-model']"
|
32
32
|
attribute :summary, css: "#product-overview"
|
33
|
+
attribute :image_url, css: "#product-image", value: :src
|
33
34
|
attribute :colors, css: ".product-colors li" listed: true
|
34
35
|
attribute :sizes, css: ".product-sizes" listed: -> { |e| e.text.split(', ') }
|
35
36
|
attribute :prices, css: ".product-price", mapped: ->(price) { ["USD", price.text.gsub(/[^\d\.]/, '').to_f] }
|
@@ -3,102 +3,23 @@
|
|
3
3
|
module Paginae
|
4
4
|
module AttributeInjector
|
5
5
|
def self.extended(base)
|
6
|
-
base.class_eval do
|
7
|
-
def data
|
8
|
-
instance_variables
|
9
|
-
.reject { |var| var.to_s =~ /_node/ }
|
10
|
-
.to_h { |var| [var.to_s.sub("@", "").to_sym, instance_variable_get(var)] }
|
11
|
-
.compact
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
6
|
class << base
|
7
|
+
include NodeBuilder
|
8
|
+
include ReaderBuilder
|
9
|
+
|
16
10
|
def attribute(name, **kwargs)
|
17
11
|
__define_node(name, **kwargs)
|
18
12
|
__define_reader(name, **kwargs)
|
13
|
+
instance_variable_set("@paginae_attributes", Set.new) unless instance_variable_defined?("@paginae_attributes")
|
14
|
+
instance_variable_get("@paginae_attributes").add(name.to_sym)
|
19
15
|
end
|
20
16
|
|
21
|
-
|
22
|
-
|
23
|
-
def __define_node(name, **kwargs)
|
24
|
-
selector = __node_selector(**kwargs)
|
25
|
-
define_method("__#{name}_nodes") do
|
26
|
-
instance_variable_get("@#{name}_nodes") || instance_variable_set("@#{name}_node", document.send(*selector))
|
27
|
-
end
|
28
|
-
private "__#{name}_nodes"
|
29
|
-
|
30
|
-
define_method("__#{name}_node") do
|
31
|
-
send("__#{name}_nodes").first
|
32
|
-
end
|
33
|
-
private "__#{name}_node"
|
34
|
-
end
|
35
|
-
|
36
|
-
def __node_selector(**kwargs)
|
37
|
-
if kwargs.key?(:css)
|
38
|
-
kwargs.slice(:css).to_a.flatten
|
39
|
-
elsif kwargs.key?(:xpath)
|
40
|
-
kwargs.slice(:xpath).to_a.flatten
|
41
|
-
elsif kwargs.key?(:id)
|
42
|
-
[:xpath, "//*[@id='#{kwargs[:id]}']"]
|
43
|
-
else
|
44
|
-
raise ArgumentError, "Undefined selector type"
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def __define_reader(name, **kwargs)
|
49
|
-
if kwargs.key?(:mapped)
|
50
|
-
__define_map_reader(name, **kwargs)
|
51
|
-
elsif kwargs.key?(:listed)
|
52
|
-
__define_list_reader(name, **kwargs)
|
53
|
-
else
|
54
|
-
__define_text_reader(name)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def __define_map_reader(name, **kwargs)
|
59
|
-
define_method name do
|
60
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
61
|
-
|
62
|
-
map = send("__#{name}_nodes")&.to_h do |node|
|
63
|
-
case kwargs[:mapped]
|
64
|
-
when Symbol
|
65
|
-
send(kwargs[:mapped], node)
|
66
|
-
when Proc
|
67
|
-
kwargs[:mapped].call(node)
|
68
|
-
else
|
69
|
-
raise ArgumentError, "Invalid mapped type"
|
70
|
-
end
|
71
|
-
end
|
72
|
-
instance_variable_set("@#{name}", map&.compact)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def __define_list_reader(name, **kwargs)
|
77
|
-
define_method name do
|
78
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
79
|
-
|
80
|
-
list = send("__#{name}_nodes")&.map do |node|
|
81
|
-
case kwargs[:listed]
|
82
|
-
when TrueClass
|
83
|
-
node&.text
|
84
|
-
when Symbol
|
85
|
-
send(kwargs[:listed], node)
|
86
|
-
when Proc
|
87
|
-
kwargs[:listed].call(node)
|
88
|
-
else
|
89
|
-
raise ArgumentError, "Invalid listed type"
|
90
|
-
end
|
91
|
-
end
|
92
|
-
instance_variable_set("@#{name}", list&.compact)
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def __define_text_reader(name)
|
97
|
-
define_method name do
|
98
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
17
|
+
attr_reader :paginae_attributes
|
18
|
+
end
|
99
19
|
|
100
|
-
|
101
|
-
|
20
|
+
base.class_eval do
|
21
|
+
def data
|
22
|
+
self.class.paginae_attributes.to_h { |key| [key, send(key)] }
|
102
23
|
end
|
103
24
|
end
|
104
25
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module Memoizer
|
5
|
+
def define_memoized_method(name)
|
6
|
+
define_method name do
|
7
|
+
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
8
|
+
|
9
|
+
instance_variable_set("@#{name}", yield(self))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module NodeBuilder
|
5
|
+
include Memoizer
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def __define_node(name, **kwargs)
|
10
|
+
define_memoized_method("__#{name}_nodes") do |instance|
|
11
|
+
instance.document.send(*__node_selector(**kwargs))
|
12
|
+
end
|
13
|
+
private "__#{name}_nodes"
|
14
|
+
|
15
|
+
define_method("__#{name}_node") do
|
16
|
+
send("__#{name}_nodes").first
|
17
|
+
end
|
18
|
+
private "__#{name}_node"
|
19
|
+
end
|
20
|
+
|
21
|
+
def __node_selector(css: nil, xpath: nil, id: nil, **kwargs)
|
22
|
+
if css
|
23
|
+
[:css, css]
|
24
|
+
elsif xpath
|
25
|
+
[:xpath, xpath]
|
26
|
+
elsif id
|
27
|
+
[:xpath, "//*[@id='#{id}']"]
|
28
|
+
else
|
29
|
+
raise ArgumentError, "Undefined selector type for #{kwargs}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Array
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_list_reader(name, method)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance.send("__#{name}_nodes")&.map do |node|
|
13
|
+
case method
|
14
|
+
when TrueClass
|
15
|
+
node&.text&.space_strip
|
16
|
+
when Symbol
|
17
|
+
instance.send(method, node)
|
18
|
+
when Proc
|
19
|
+
method.call(node)
|
20
|
+
else
|
21
|
+
raise ArgumentError, "Invalid listed type for #{method.class}"
|
22
|
+
end
|
23
|
+
end&.compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Hash
|
6
|
+
private
|
7
|
+
|
8
|
+
def __define_hash_reader(name, method)
|
9
|
+
define_memoized_method name do |instance|
|
10
|
+
instance.send("__#{name}_nodes")&.to_h do |node|
|
11
|
+
case method
|
12
|
+
when Symbol
|
13
|
+
instance.send(method, node)
|
14
|
+
when Proc
|
15
|
+
method.call(node)
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Invalid mapped type for #{method.class}"
|
18
|
+
end
|
19
|
+
end&.compact
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Text
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_text_reader(name)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance
|
13
|
+
.send("__#{name}_node")
|
14
|
+
&.text
|
15
|
+
&.space_strip
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Value
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_value_reader(name, value)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance
|
13
|
+
.send("__#{name}_node")
|
14
|
+
&.attribute(value.to_s)
|
15
|
+
&.value
|
16
|
+
&.space_strip
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "reader_builder/array"
|
4
|
+
require_relative "reader_builder/hash"
|
5
|
+
require_relative "reader_builder/text"
|
6
|
+
require_relative "reader_builder/value"
|
7
|
+
|
8
|
+
module Paginae
|
9
|
+
module ReaderBuilder
|
10
|
+
include Memoizer
|
11
|
+
include Array
|
12
|
+
include Hash
|
13
|
+
include Text
|
14
|
+
include Value
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def __define_reader(name, mapped: nil, listed: nil, value: nil, **_kwargs)
|
19
|
+
if mapped
|
20
|
+
__define_hash_reader(name, mapped)
|
21
|
+
elsif listed
|
22
|
+
__define_list_reader(name, listed)
|
23
|
+
elsif value
|
24
|
+
__define_value_reader(name, value)
|
25
|
+
else
|
26
|
+
__define_text_reader(name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/paginae/version.rb
CHANGED
data/lib/paginae.rb
CHANGED
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
require "nokogiri"
|
4
4
|
require_relative "paginae/version"
|
5
|
+
require_relative "paginae/space_striper"
|
6
|
+
require_relative "paginae/memoizer"
|
5
7
|
require_relative "paginae/html_initializer_injector"
|
6
8
|
require_relative "paginae/attribute_injector"
|
9
|
+
require_relative "paginae/node_builder"
|
10
|
+
require_relative "paginae/reader_builder"
|
7
11
|
require_relative "paginae/web"
|
8
12
|
|
9
13
|
module Paginae
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paginae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis Ezcurdia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -32,7 +32,6 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- ".rubocop.yml"
|
35
|
-
- ".rubocop_todo.yml"
|
36
35
|
- CHANGELOG.md
|
37
36
|
- CODE_OF_CONDUCT.md
|
38
37
|
- Gemfile
|
@@ -43,6 +42,14 @@ files:
|
|
43
42
|
- lib/paginae.rb
|
44
43
|
- lib/paginae/attribute_injector.rb
|
45
44
|
- lib/paginae/html_initializer_injector.rb
|
45
|
+
- lib/paginae/memoizer.rb
|
46
|
+
- lib/paginae/node_builder.rb
|
47
|
+
- lib/paginae/reader_builder.rb
|
48
|
+
- lib/paginae/reader_builder/array.rb
|
49
|
+
- lib/paginae/reader_builder/hash.rb
|
50
|
+
- lib/paginae/reader_builder/text.rb
|
51
|
+
- lib/paginae/reader_builder/value.rb
|
52
|
+
- lib/paginae/space_striper.rb
|
46
53
|
- lib/paginae/version.rb
|
47
54
|
- lib/paginae/web.rb
|
48
55
|
- sig/paginae.rbs
|
data/.rubocop_todo.yml
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-04-29 18:20:44 UTC using RuboCop version 1.28.2.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
# Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
|
11
|
-
Metrics/AbcSize:
|
12
|
-
Exclude:
|
13
|
-
- lib/paginae/attribute_injector.rb
|
14
|
-
|
15
|
-
# Offense count: 1
|
16
|
-
# Configuration parameters: IgnoredMethods.
|
17
|
-
Metrics/CyclomaticComplexity:
|
18
|
-
Max: 27
|
19
|
-
|
20
|
-
# Offense count: 1
|
21
|
-
# Configuration parameters: IgnoredMethods.
|
22
|
-
Metrics/PerceivedComplexity:
|
23
|
-
Max: 27
|
24
|
-
|
25
|
-
Metrics/MethodLength:
|
26
|
-
Exclude:
|
27
|
-
- lib/paginae/attribute_injector.rb
|