paginae 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +11 -5
- data/Gemfile.lock +4 -4
- data/README.md +1 -0
- data/lib/paginae/attribute_injector.rb +10 -100
- data/lib/paginae/memoizer.rb +13 -0
- data/lib/paginae/node_builder.rb +33 -0
- data/lib/paginae/reader_builder/array.rb +28 -0
- data/lib/paginae/reader_builder/hash.rb +24 -0
- data/lib/paginae/reader_builder/text.rb +20 -0
- data/lib/paginae/reader_builder/value.rb +21 -0
- data/lib/paginae/reader_builder.rb +30 -0
- data/lib/paginae/space_striper.rb +17 -0
- data/lib/paginae/version.rb +1 -1
- data/lib/paginae.rb +4 -0
- metadata +10 -3
- data/.rubocop_todo.yml +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fce7acf7bb8f43e9ca22ce7084e0b720811acff7fe28001261256357d93829a
|
4
|
+
data.tar.gz: ceaa9ed9c9c76ce777325b7de623157b1bbef37ee4c2d5aec0951bb2aa565085
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5158c1120fb6f0e3ce8021cee76297ddb5745c0d55a1706507602f39635f4a865d1bde0219199cfe8e97d36a05a444309ffbf686945eb2df8df650488764939b
|
7
|
+
data.tar.gz: 5f806d1ad1be0b8d5f8391a80f7b29ebe3901e494c1d005ca6c0da9c2de33f4982e5d135460570ebc9190e0bdb2eb5bdea5837cedadb5ccc7a506a2beea075f5
|
data/.rubocop.yml
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
inherit_from: .rubocop_todo.yml
|
2
|
-
|
3
1
|
AllCops:
|
4
2
|
TargetRubyVersion: 2.7
|
3
|
+
SuggestExtensions: false
|
5
4
|
NewCops: enable
|
6
5
|
|
6
|
+
Style/Documentation:
|
7
|
+
Enabled: false
|
8
|
+
|
7
9
|
Style/StringLiterals:
|
8
10
|
Enabled: true
|
9
11
|
EnforcedStyle: double_quotes
|
@@ -13,7 +15,11 @@ Style/StringLiteralsInInterpolation:
|
|
13
15
|
EnforcedStyle: double_quotes
|
14
16
|
|
15
17
|
Layout/LineLength:
|
16
|
-
|
18
|
+
Exclude:
|
19
|
+
- test/**/*.rb
|
17
20
|
|
18
|
-
|
19
|
-
|
21
|
+
Metrics/MethodLength:
|
22
|
+
Max: 16
|
23
|
+
|
24
|
+
Metrics/CyclomaticComplexity:
|
25
|
+
Max: 9
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
paginae (0.1.
|
4
|
+
paginae (0.1.4)
|
5
5
|
nokogiri (>= 1.13.4)
|
6
6
|
|
7
7
|
GEM
|
@@ -27,17 +27,17 @@ GEM
|
|
27
27
|
racc (1.6.0)
|
28
28
|
rainbow (3.1.1)
|
29
29
|
rake (13.0.6)
|
30
|
-
regexp_parser (2.
|
30
|
+
regexp_parser (2.5.0)
|
31
31
|
reline (0.3.1)
|
32
32
|
io-console (~> 0.5)
|
33
33
|
rexml (3.2.5)
|
34
|
-
rubocop (1.
|
34
|
+
rubocop (1.30.0)
|
35
35
|
parallel (~> 1.10)
|
36
36
|
parser (>= 3.1.0.0)
|
37
37
|
rainbow (>= 2.2.2, < 4.0)
|
38
38
|
regexp_parser (>= 1.8, < 3.0)
|
39
39
|
rexml (>= 3.2.5, < 4.0)
|
40
|
-
rubocop-ast (>= 1.
|
40
|
+
rubocop-ast (>= 1.18.0, < 2.0)
|
41
41
|
ruby-progressbar (~> 1.7)
|
42
42
|
unicode-display_width (>= 1.4.0, < 3.0)
|
43
43
|
rubocop-ast (1.18.0)
|
data/README.md
CHANGED
@@ -30,6 +30,7 @@ class ProductPage
|
|
30
30
|
attribute :name, id: "product-title"
|
31
31
|
attribute :model, xpath: "//h2[@class='product-model']"
|
32
32
|
attribute :summary, css: "#product-overview"
|
33
|
+
attribute :image_url, css: "#product-image", value: :src
|
33
34
|
attribute :colors, css: ".product-colors li" listed: true
|
34
35
|
attribute :sizes, css: ".product-sizes" listed: -> { |e| e.text.split(', ') }
|
35
36
|
attribute :prices, css: ".product-price", mapped: ->(price) { ["USD", price.text.gsub(/[^\d\.]/, '').to_f] }
|
@@ -3,113 +3,23 @@
|
|
3
3
|
module Paginae
|
4
4
|
module AttributeInjector
|
5
5
|
def self.extended(base)
|
6
|
-
base.class_eval do
|
7
|
-
def data
|
8
|
-
instance_variables
|
9
|
-
.reject { |var| var.to_s =~ /_node/ }
|
10
|
-
.to_h { |var| [var.to_s.sub("@", "").to_sym, instance_variable_get(var)] }
|
11
|
-
.compact
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
6
|
class << base
|
7
|
+
include NodeBuilder
|
8
|
+
include ReaderBuilder
|
9
|
+
|
16
10
|
def attribute(name, **kwargs)
|
17
11
|
__define_node(name, **kwargs)
|
18
12
|
__define_reader(name, **kwargs)
|
13
|
+
instance_variable_set("@paginae_attributes", Set.new) unless instance_variable_defined?("@paginae_attributes")
|
14
|
+
instance_variable_get("@paginae_attributes").add(name.to_sym)
|
19
15
|
end
|
20
16
|
|
21
|
-
|
22
|
-
|
23
|
-
def __define_node(name, **kwargs)
|
24
|
-
selector = __node_selector(**kwargs)
|
25
|
-
define_method("__#{name}_nodes") do
|
26
|
-
instance_variable_get("@#{name}_nodes") || instance_variable_set("@#{name}_node", document.send(*selector))
|
27
|
-
end
|
28
|
-
private "__#{name}_nodes"
|
29
|
-
|
30
|
-
define_method("__#{name}_node") do
|
31
|
-
send("__#{name}_nodes").first
|
32
|
-
end
|
33
|
-
private "__#{name}_node"
|
34
|
-
end
|
35
|
-
|
36
|
-
def __node_selector(**kwargs)
|
37
|
-
if kwargs.key?(:css)
|
38
|
-
kwargs.slice(:css).to_a.flatten
|
39
|
-
elsif kwargs.key?(:xpath)
|
40
|
-
kwargs.slice(:xpath).to_a.flatten
|
41
|
-
elsif kwargs.key?(:id)
|
42
|
-
[:xpath, "//*[@id='#{kwargs[:id]}']"]
|
43
|
-
else
|
44
|
-
raise ArgumentError, "Undefined selector type"
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def __define_reader(name, **kwargs)
|
49
|
-
if kwargs.key?(:mapped)
|
50
|
-
__define_map_reader(name, **kwargs)
|
51
|
-
elsif kwargs.key?(:listed)
|
52
|
-
__define_list_reader(name, **kwargs)
|
53
|
-
elsif kwargs.key?(:value)
|
54
|
-
__define_value_reader(name, **kwargs)
|
55
|
-
else
|
56
|
-
__define_text_reader(name)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
def __define_map_reader(name, **kwargs)
|
61
|
-
define_method name do
|
62
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
63
|
-
|
64
|
-
map = send("__#{name}_nodes")&.to_h do |node|
|
65
|
-
case kwargs[:mapped]
|
66
|
-
when Symbol
|
67
|
-
send(kwargs[:mapped], node)
|
68
|
-
when Proc
|
69
|
-
kwargs[:mapped].call(node)
|
70
|
-
else
|
71
|
-
raise ArgumentError, "Invalid mapped type"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
instance_variable_set("@#{name}", map&.compact)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def __define_list_reader(name, **kwargs)
|
79
|
-
define_method name do
|
80
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
81
|
-
|
82
|
-
list = send("__#{name}_nodes")&.map do |node|
|
83
|
-
case kwargs[:listed]
|
84
|
-
when TrueClass
|
85
|
-
node&.text&.strip
|
86
|
-
when Symbol
|
87
|
-
send(kwargs[:listed], node)
|
88
|
-
when Proc
|
89
|
-
kwargs[:listed].call(node)
|
90
|
-
else
|
91
|
-
raise ArgumentError, "Invalid listed type"
|
92
|
-
end
|
93
|
-
end
|
94
|
-
instance_variable_set("@#{name}", list&.compact)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
def __define_text_reader(name)
|
99
|
-
define_method name do
|
100
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
101
|
-
|
102
|
-
instance_variable_set("@#{name}", send("__#{name}_node")&.text&.gsub(/\s+/, " ")&.strip)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def __define_value_reader(name, **kwargs)
|
107
|
-
define_method name do
|
108
|
-
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
17
|
+
attr_reader :paginae_attributes
|
18
|
+
end
|
109
19
|
|
110
|
-
|
111
|
-
|
112
|
-
|
20
|
+
base.class_eval do
|
21
|
+
def data
|
22
|
+
self.class.paginae_attributes.to_h { |key| [key, send(key)] }
|
113
23
|
end
|
114
24
|
end
|
115
25
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module Memoizer
|
5
|
+
def define_memoized_method(name)
|
6
|
+
define_method name do
|
7
|
+
return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
|
8
|
+
|
9
|
+
instance_variable_set("@#{name}", yield(self))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module NodeBuilder
|
5
|
+
include Memoizer
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def __define_node(name, **kwargs)
|
10
|
+
define_memoized_method("__#{name}_nodes") do |instance|
|
11
|
+
instance.document.send(*__node_selector(**kwargs))
|
12
|
+
end
|
13
|
+
private "__#{name}_nodes"
|
14
|
+
|
15
|
+
define_method("__#{name}_node") do
|
16
|
+
send("__#{name}_nodes").first
|
17
|
+
end
|
18
|
+
private "__#{name}_node"
|
19
|
+
end
|
20
|
+
|
21
|
+
def __node_selector(css: nil, xpath: nil, id: nil, **kwargs)
|
22
|
+
if css
|
23
|
+
[:css, css]
|
24
|
+
elsif xpath
|
25
|
+
[:xpath, xpath]
|
26
|
+
elsif id
|
27
|
+
[:xpath, "//*[@id='#{id}']"]
|
28
|
+
else
|
29
|
+
raise ArgumentError, "Undefined selector type for #{kwargs}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Array
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_list_reader(name, method)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance.send("__#{name}_nodes")&.map do |node|
|
13
|
+
case method
|
14
|
+
when TrueClass
|
15
|
+
node&.text&.space_strip
|
16
|
+
when Symbol
|
17
|
+
instance.send(method, node)
|
18
|
+
when Proc
|
19
|
+
method.call(node)
|
20
|
+
else
|
21
|
+
raise ArgumentError, "Invalid listed type for #{method.class}"
|
22
|
+
end
|
23
|
+
end&.compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Hash
|
6
|
+
private
|
7
|
+
|
8
|
+
def __define_hash_reader(name, method)
|
9
|
+
define_memoized_method name do |instance|
|
10
|
+
instance.send("__#{name}_nodes")&.to_h do |node|
|
11
|
+
case method
|
12
|
+
when Symbol
|
13
|
+
instance.send(method, node)
|
14
|
+
when Proc
|
15
|
+
method.call(node)
|
16
|
+
else
|
17
|
+
raise ArgumentError, "Invalid mapped type for #{method.class}"
|
18
|
+
end
|
19
|
+
end&.compact
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Text
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_text_reader(name)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance
|
13
|
+
.send("__#{name}_node")
|
14
|
+
&.text
|
15
|
+
&.space_strip
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Paginae
|
4
|
+
module ReaderBuilder
|
5
|
+
module Value
|
6
|
+
using SpaceStriper
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def __define_value_reader(name, value)
|
11
|
+
define_memoized_method name do |instance|
|
12
|
+
instance
|
13
|
+
.send("__#{name}_node")
|
14
|
+
&.attribute(value.to_s)
|
15
|
+
&.value
|
16
|
+
&.space_strip
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "reader_builder/array"
|
4
|
+
require_relative "reader_builder/hash"
|
5
|
+
require_relative "reader_builder/text"
|
6
|
+
require_relative "reader_builder/value"
|
7
|
+
|
8
|
+
module Paginae
|
9
|
+
module ReaderBuilder
|
10
|
+
include Memoizer
|
11
|
+
include Array
|
12
|
+
include Hash
|
13
|
+
include Text
|
14
|
+
include Value
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def __define_reader(name, mapped: nil, listed: nil, value: nil, **_kwargs)
|
19
|
+
if mapped
|
20
|
+
__define_hash_reader(name, mapped)
|
21
|
+
elsif listed
|
22
|
+
__define_list_reader(name, listed)
|
23
|
+
elsif value
|
24
|
+
__define_value_reader(name, value)
|
25
|
+
else
|
26
|
+
__define_text_reader(name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/paginae/version.rb
CHANGED
data/lib/paginae.rb
CHANGED
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
require "nokogiri"
|
4
4
|
require_relative "paginae/version"
|
5
|
+
require_relative "paginae/space_striper"
|
6
|
+
require_relative "paginae/memoizer"
|
5
7
|
require_relative "paginae/html_initializer_injector"
|
6
8
|
require_relative "paginae/attribute_injector"
|
9
|
+
require_relative "paginae/node_builder"
|
10
|
+
require_relative "paginae/reader_builder"
|
7
11
|
require_relative "paginae/web"
|
8
12
|
|
9
13
|
module Paginae
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paginae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis Ezcurdia
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -32,7 +32,6 @@ extensions: []
|
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
34
|
- ".rubocop.yml"
|
35
|
-
- ".rubocop_todo.yml"
|
36
35
|
- CHANGELOG.md
|
37
36
|
- CODE_OF_CONDUCT.md
|
38
37
|
- Gemfile
|
@@ -43,6 +42,14 @@ files:
|
|
43
42
|
- lib/paginae.rb
|
44
43
|
- lib/paginae/attribute_injector.rb
|
45
44
|
- lib/paginae/html_initializer_injector.rb
|
45
|
+
- lib/paginae/memoizer.rb
|
46
|
+
- lib/paginae/node_builder.rb
|
47
|
+
- lib/paginae/reader_builder.rb
|
48
|
+
- lib/paginae/reader_builder/array.rb
|
49
|
+
- lib/paginae/reader_builder/hash.rb
|
50
|
+
- lib/paginae/reader_builder/text.rb
|
51
|
+
- lib/paginae/reader_builder/value.rb
|
52
|
+
- lib/paginae/space_striper.rb
|
46
53
|
- lib/paginae/version.rb
|
47
54
|
- lib/paginae/web.rb
|
48
55
|
- sig/paginae.rbs
|
data/.rubocop_todo.yml
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# This configuration was generated by
|
2
|
-
# `rubocop --auto-gen-config`
|
3
|
-
# on 2022-04-29 18:20:44 UTC using RuboCop version 1.28.2.
|
4
|
-
# The point is for the user to remove these configuration records
|
5
|
-
# one by one as the offenses are removed from the code base.
|
6
|
-
# Note that changes in the inspected code, or installation of new
|
7
|
-
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 1
|
10
|
-
# Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
|
11
|
-
Metrics/AbcSize:
|
12
|
-
Exclude:
|
13
|
-
- lib/paginae/attribute_injector.rb
|
14
|
-
|
15
|
-
# Offense count: 1
|
16
|
-
# Configuration parameters: IgnoredMethods.
|
17
|
-
Metrics/CyclomaticComplexity:
|
18
|
-
Max: 27
|
19
|
-
|
20
|
-
# Offense count: 1
|
21
|
-
# Configuration parameters: IgnoredMethods.
|
22
|
-
Metrics/PerceivedComplexity:
|
23
|
-
Max: 27
|
24
|
-
|
25
|
-
Metrics/MethodLength:
|
26
|
-
Exclude:
|
27
|
-
- lib/paginae/attribute_injector.rb
|