paginae 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51b81660d03e3fb92a5cb8ab12995813ee979be57ce6b3a2971acab57e590418
4
- data.tar.gz: ef8989a64d197557315a0f7390bd14b8279ead060f6187f7b6fdb7b99f0f2fe1
3
+ metadata.gz: 6fce7acf7bb8f43e9ca22ce7084e0b720811acff7fe28001261256357d93829a
4
+ data.tar.gz: ceaa9ed9c9c76ce777325b7de623157b1bbef37ee4c2d5aec0951bb2aa565085
5
5
  SHA512:
6
- metadata.gz: d42ea335bf766e4eaf371036b515e92792f86fa3c80a23355cff2c63b8e280efaa5a3b76f7d0e7cfabd11f2e914f843a1a5e2e8156af37ca83a15f7257fa944a
7
- data.tar.gz: 1338900ec4e6ebbd83917824610bfdb7c65d9cac3150dcc530e931e03e324f005a1a197fb2f5118895fbbf84ceb15d256b1ba64a3aba2edd134fd0b680067a20
6
+ metadata.gz: 5158c1120fb6f0e3ce8021cee76297ddb5745c0d55a1706507602f39635f4a865d1bde0219199cfe8e97d36a05a444309ffbf686945eb2df8df650488764939b
7
+ data.tar.gz: 5f806d1ad1be0b8d5f8391a80f7b29ebe3901e494c1d005ca6c0da9c2de33f4982e5d135460570ebc9190e0bdb2eb5bdea5837cedadb5ccc7a506a2beea075f5
data/.rubocop.yml CHANGED
@@ -1,9 +1,11 @@
1
- inherit_from: .rubocop_todo.yml
2
-
3
1
  AllCops:
4
2
  TargetRubyVersion: 2.7
3
+ SuggestExtensions: false
5
4
  NewCops: enable
6
5
 
6
+ Style/Documentation:
7
+ Enabled: false
8
+
7
9
  Style/StringLiterals:
8
10
  Enabled: true
9
11
  EnforcedStyle: double_quotes
@@ -13,7 +15,11 @@ Style/StringLiteralsInInterpolation:
13
15
  EnforcedStyle: double_quotes
14
16
 
15
17
  Layout/LineLength:
16
- Max: 130
18
+ Exclude:
19
+ - test/**/*.rb
17
20
 
18
- Style/Documentation:
19
- Enabled: false
21
+ Metrics/MethodLength:
22
+ Max: 16
23
+
24
+ Metrics/CyclomaticComplexity:
25
+ Max: 9
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- paginae (0.1.2)
4
+ paginae (0.1.4)
5
5
  nokogiri (>= 1.13.4)
6
6
 
7
7
  GEM
@@ -27,17 +27,17 @@ GEM
27
27
  racc (1.6.0)
28
28
  rainbow (3.1.1)
29
29
  rake (13.0.6)
30
- regexp_parser (2.4.0)
30
+ regexp_parser (2.5.0)
31
31
  reline (0.3.1)
32
32
  io-console (~> 0.5)
33
33
  rexml (3.2.5)
34
- rubocop (1.29.1)
34
+ rubocop (1.30.0)
35
35
  parallel (~> 1.10)
36
36
  parser (>= 3.1.0.0)
37
37
  rainbow (>= 2.2.2, < 4.0)
38
38
  regexp_parser (>= 1.8, < 3.0)
39
39
  rexml (>= 3.2.5, < 4.0)
40
- rubocop-ast (>= 1.17.0, < 2.0)
40
+ rubocop-ast (>= 1.18.0, < 2.0)
41
41
  ruby-progressbar (~> 1.7)
42
42
  unicode-display_width (>= 1.4.0, < 3.0)
43
43
  rubocop-ast (1.18.0)
data/README.md CHANGED
@@ -30,6 +30,7 @@ class ProductPage
30
30
  attribute :name, id: "product-title"
31
31
  attribute :model, xpath: "//h2[@class='product-model']"
32
32
  attribute :summary, css: "#product-overview"
33
+ attribute :image_url, css: "#product-image", value: :src
33
34
  attribute :colors, css: ".product-colors li" listed: true
34
35
  attribute :sizes, css: ".product-sizes" listed: -> { |e| e.text.split(', ') }
35
36
  attribute :prices, css: ".product-price", mapped: ->(price) { ["USD", price.text.gsub(/[^\d\.]/, '').to_f] }
@@ -3,113 +3,23 @@
3
3
  module Paginae
4
4
  module AttributeInjector
5
5
  def self.extended(base)
6
- base.class_eval do
7
- def data
8
- instance_variables
9
- .reject { |var| var.to_s =~ /_node/ }
10
- .to_h { |var| [var.to_s.sub("@", "").to_sym, instance_variable_get(var)] }
11
- .compact
12
- end
13
- end
14
-
15
6
  class << base
7
+ include NodeBuilder
8
+ include ReaderBuilder
9
+
16
10
  def attribute(name, **kwargs)
17
11
  __define_node(name, **kwargs)
18
12
  __define_reader(name, **kwargs)
13
+ instance_variable_set("@paginae_attributes", Set.new) unless instance_variable_defined?("@paginae_attributes")
14
+ instance_variable_get("@paginae_attributes").add(name.to_sym)
19
15
  end
20
16
 
21
- private
22
-
23
- def __define_node(name, **kwargs)
24
- selector = __node_selector(**kwargs)
25
- define_method("__#{name}_nodes") do
26
- instance_variable_get("@#{name}_nodes") || instance_variable_set("@#{name}_node", document.send(*selector))
27
- end
28
- private "__#{name}_nodes"
29
-
30
- define_method("__#{name}_node") do
31
- send("__#{name}_nodes").first
32
- end
33
- private "__#{name}_node"
34
- end
35
-
36
- def __node_selector(**kwargs)
37
- if kwargs.key?(:css)
38
- kwargs.slice(:css).to_a.flatten
39
- elsif kwargs.key?(:xpath)
40
- kwargs.slice(:xpath).to_a.flatten
41
- elsif kwargs.key?(:id)
42
- [:xpath, "//*[@id='#{kwargs[:id]}']"]
43
- else
44
- raise ArgumentError, "Undefined selector type"
45
- end
46
- end
47
-
48
- def __define_reader(name, **kwargs)
49
- if kwargs.key?(:mapped)
50
- __define_map_reader(name, **kwargs)
51
- elsif kwargs.key?(:listed)
52
- __define_list_reader(name, **kwargs)
53
- elsif kwargs.key?(:value)
54
- __define_value_reader(name, **kwargs)
55
- else
56
- __define_text_reader(name)
57
- end
58
- end
59
-
60
- def __define_map_reader(name, **kwargs)
61
- define_method name do
62
- return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
63
-
64
- map = send("__#{name}_nodes")&.to_h do |node|
65
- case kwargs[:mapped]
66
- when Symbol
67
- send(kwargs[:mapped], node)
68
- when Proc
69
- kwargs[:mapped].call(node)
70
- else
71
- raise ArgumentError, "Invalid mapped type"
72
- end
73
- end
74
- instance_variable_set("@#{name}", map&.compact)
75
- end
76
- end
77
-
78
- def __define_list_reader(name, **kwargs)
79
- define_method name do
80
- return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
81
-
82
- list = send("__#{name}_nodes")&.map do |node|
83
- case kwargs[:listed]
84
- when TrueClass
85
- node&.text&.strip
86
- when Symbol
87
- send(kwargs[:listed], node)
88
- when Proc
89
- kwargs[:listed].call(node)
90
- else
91
- raise ArgumentError, "Invalid listed type"
92
- end
93
- end
94
- instance_variable_set("@#{name}", list&.compact)
95
- end
96
- end
97
-
98
- def __define_text_reader(name)
99
- define_method name do
100
- return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
101
-
102
- instance_variable_set("@#{name}", send("__#{name}_node")&.text&.gsub(/\s+/, " ")&.strip)
103
- end
104
- end
105
-
106
- def __define_value_reader(name, **kwargs)
107
- define_method name do
108
- return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
17
+ attr_reader :paginae_attributes
18
+ end
109
19
 
110
- instance_variable_set("@#{name}",
111
- send("__#{name}_node")&.attribute(kwargs[:value].to_s)&.value&.gsub(/\s+/, " ")&.strip)
112
- end
20
+ base.class_eval do
21
+ def data
22
+ self.class.paginae_attributes.to_h { |key| [key, send(key)] }
113
23
  end
114
24
  end
115
25
  end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module Memoizer
5
+ def define_memoized_method(name)
6
+ define_method name do
7
+ return instance_variable_get("@#{name}") if instance_variable_defined?("@#{name}")
8
+
9
+ instance_variable_set("@#{name}", yield(self))
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module NodeBuilder
5
+ include Memoizer
6
+
7
+ private
8
+
9
+ def __define_node(name, **kwargs)
10
+ define_memoized_method("__#{name}_nodes") do |instance|
11
+ instance.document.send(*__node_selector(**kwargs))
12
+ end
13
+ private "__#{name}_nodes"
14
+
15
+ define_method("__#{name}_node") do
16
+ send("__#{name}_nodes").first
17
+ end
18
+ private "__#{name}_node"
19
+ end
20
+
21
+ def __node_selector(css: nil, xpath: nil, id: nil, **kwargs)
22
+ if css
23
+ [:css, css]
24
+ elsif xpath
25
+ [:xpath, xpath]
26
+ elsif id
27
+ [:xpath, "//*[@id='#{id}']"]
28
+ else
29
+ raise ArgumentError, "Undefined selector type for #{kwargs}"
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module ReaderBuilder
5
+ module Array
6
+ using SpaceStriper
7
+
8
+ private
9
+
10
+ def __define_list_reader(name, method)
11
+ define_memoized_method name do |instance|
12
+ instance.send("__#{name}_nodes")&.map do |node|
13
+ case method
14
+ when TrueClass
15
+ node&.text&.space_strip
16
+ when Symbol
17
+ instance.send(method, node)
18
+ when Proc
19
+ method.call(node)
20
+ else
21
+ raise ArgumentError, "Invalid listed type for #{method.class}"
22
+ end
23
+ end&.compact
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module ReaderBuilder
5
+ module Hash
6
+ private
7
+
8
+ def __define_hash_reader(name, method)
9
+ define_memoized_method name do |instance|
10
+ instance.send("__#{name}_nodes")&.to_h do |node|
11
+ case method
12
+ when Symbol
13
+ instance.send(method, node)
14
+ when Proc
15
+ method.call(node)
16
+ else
17
+ raise ArgumentError, "Invalid mapped type for #{method.class}"
18
+ end
19
+ end&.compact
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module ReaderBuilder
5
+ module Text
6
+ using SpaceStriper
7
+
8
+ private
9
+
10
+ def __define_text_reader(name)
11
+ define_memoized_method name do |instance|
12
+ instance
13
+ .send("__#{name}_node")
14
+ &.text
15
+ &.space_strip
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module ReaderBuilder
5
+ module Value
6
+ using SpaceStriper
7
+
8
+ private
9
+
10
+ def __define_value_reader(name, value)
11
+ define_memoized_method name do |instance|
12
+ instance
13
+ .send("__#{name}_node")
14
+ &.attribute(value.to_s)
15
+ &.value
16
+ &.space_strip
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "reader_builder/array"
4
+ require_relative "reader_builder/hash"
5
+ require_relative "reader_builder/text"
6
+ require_relative "reader_builder/value"
7
+
8
+ module Paginae
9
+ module ReaderBuilder
10
+ include Memoizer
11
+ include Array
12
+ include Hash
13
+ include Text
14
+ include Value
15
+
16
+ private
17
+
18
+ def __define_reader(name, mapped: nil, listed: nil, value: nil, **_kwargs)
19
+ if mapped
20
+ __define_hash_reader(name, mapped)
21
+ elsif listed
22
+ __define_list_reader(name, listed)
23
+ elsif value
24
+ __define_value_reader(name, value)
25
+ else
26
+ __define_text_reader(name)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Paginae
4
+ module SpaceStriper
5
+ refine NilClass do
6
+ def space_strip
7
+ self
8
+ end
9
+ end
10
+
11
+ refine String do
12
+ def space_strip
13
+ gsub(/\s+/, " ").strip
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Paginae
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
data/lib/paginae.rb CHANGED
@@ -2,8 +2,12 @@
2
2
 
3
3
  require "nokogiri"
4
4
  require_relative "paginae/version"
5
+ require_relative "paginae/space_striper"
6
+ require_relative "paginae/memoizer"
5
7
  require_relative "paginae/html_initializer_injector"
6
8
  require_relative "paginae/attribute_injector"
9
+ require_relative "paginae/node_builder"
10
+ require_relative "paginae/reader_builder"
7
11
  require_relative "paginae/web"
8
12
 
9
13
  module Paginae
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paginae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis Ezcurdia
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-05-26 00:00:00.000000000 Z
11
+ date: 2022-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -32,7 +32,6 @@ extensions: []
32
32
  extra_rdoc_files: []
33
33
  files:
34
34
  - ".rubocop.yml"
35
- - ".rubocop_todo.yml"
36
35
  - CHANGELOG.md
37
36
  - CODE_OF_CONDUCT.md
38
37
  - Gemfile
@@ -43,6 +42,14 @@ files:
43
42
  - lib/paginae.rb
44
43
  - lib/paginae/attribute_injector.rb
45
44
  - lib/paginae/html_initializer_injector.rb
45
+ - lib/paginae/memoizer.rb
46
+ - lib/paginae/node_builder.rb
47
+ - lib/paginae/reader_builder.rb
48
+ - lib/paginae/reader_builder/array.rb
49
+ - lib/paginae/reader_builder/hash.rb
50
+ - lib/paginae/reader_builder/text.rb
51
+ - lib/paginae/reader_builder/value.rb
52
+ - lib/paginae/space_striper.rb
46
53
  - lib/paginae/version.rb
47
54
  - lib/paginae/web.rb
48
55
  - sig/paginae.rbs
data/.rubocop_todo.yml DELETED
@@ -1,27 +0,0 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2022-04-29 18:20:44 UTC using RuboCop version 1.28.2.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 1
10
- # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
11
- Metrics/AbcSize:
12
- Exclude:
13
- - lib/paginae/attribute_injector.rb
14
-
15
- # Offense count: 1
16
- # Configuration parameters: IgnoredMethods.
17
- Metrics/CyclomaticComplexity:
18
- Max: 27
19
-
20
- # Offense count: 1
21
- # Configuration parameters: IgnoredMethods.
22
- Metrics/PerceivedComplexity:
23
- Max: 27
24
-
25
- Metrics/MethodLength:
26
- Exclude:
27
- - lib/paginae/attribute_injector.rb