nokogiri-html-ext 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fe48e23deb7150b7adf50eb5298dfef2dbc9a75898574ed21cd567038728d2ed
4
- data.tar.gz: 0f5f378ae1180ab2718b1927e220d092363a8cb013766e4ffaf483050c2f7020
3
+ metadata.gz: 307b06335c0935cec4304b10c13ef113c514646d5a746f703ccb1567a39120ce
4
+ data.tar.gz: 26894c4ef4b3a018f2b776f4929035e590a8c9514af529fce581c23fb29e4cf0
5
5
  SHA512:
6
- metadata.gz: beba250e78b23dfea02fe3cdbca224049f4a63f7f6ab990403eedaab6d29770f09aaa47fa2cadc18304237baa2b9b2d751a2940b6d34e98a9b3abd48f815e02e
7
- data.tar.gz: 2971a2ae956f0877aae05a70ea6283ccebb72c935667deda53d7337f4cde74b946950ce0a2f826a7a522c7538ddf84090603a683690baf080238709a705f0dd8
6
+ metadata.gz: 66057c43dd9cc165dcd12603f8546119f39292305b3d943f5930c138686acbec53d2dfd744565343fe1f2351a8aa9a46b84a6d7d1ba7cafe7f10dd17be19548e
7
+ data.tar.gz: 52e9543d90ed8dc1790458f660a02cdb7c4574cb8c3fc7a093b7780cbe4627a8badc6ef84d51aae7acdc7ad5aa2e88cdec38e5eb597acffd6f8e8652950b62f5
@@ -3,6 +3,9 @@
3
3
  module Nokogiri
4
4
  module HTML4
5
5
  class Document < Nokogiri::XML::Document
6
+ # @see https://html.spec.whatwg.org/#image-candidate-string
7
+ IMAGE_CANDIDATE_STRING_PATTERN = /^(?<url>.+?)(?<descriptor>\s+.+)?$/.freeze
8
+
6
9
  # A map of HTML +srcset+ attributes and their associated element names.
7
10
  #
8
11
  # @see https://html.spec.whatwg.org/#srcset-attributes
@@ -56,6 +59,37 @@ module Nokogiri
56
59
  base["href"] = url
57
60
  end
58
61
 
62
+ # Conditionally normalize a URL's empty path component.
63
+ #
64
+ # @param [String]
65
+ #
66
+ # @return [String]
67
+ def normalize_empty_path(url)
68
+ uri = Addressable::URI.parse(url.strip)
69
+
70
+ uri.path = uri.normalized_path if uri.path.empty? && uri.path != uri.normalized_path
71
+
72
+ uri.to_s
73
+ rescue Addressable::URI::InvalidURIError
74
+ url
75
+ end
76
+
77
+ # Normalize the document's URLs whose path components are empty.
78
+ #
79
+ # @return [self]
80
+ def normalize_empty_paths!
81
+ xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
82
+ attr_node.value =
83
+ if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
84
+ normalize_srcset_attribute(attr_node.value)
85
+ else
86
+ normalize_empty_path(attr_node.value)
87
+ end
88
+ end
89
+
90
+ self
91
+ end
92
+
59
93
  # Resolve a URL based on the current document.
60
94
  #
61
95
  # @param url [String]
@@ -78,7 +112,7 @@ module Nokogiri
78
112
  xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
79
113
  attr_node.value =
80
114
  if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
81
- resolve_srcset_attributes(attr_node.value.split(/\s*,\s*/))
115
+ resolve_srcset_attribute(attr_node.value)
82
116
  else
83
117
  resolve_url(attr_node.value)
84
118
  end
@@ -89,20 +123,40 @@ module Nokogiri
89
123
 
90
124
  private
91
125
 
92
- # Resolve a set of +String+s that represent +srcset+ attribute image
93
- # candidate strings.
126
+ # Normalize the URLs in a +srcset+ attribute's image candidate strings.
94
127
  #
95
- # @param srcset_attributes [Array<String>]
128
+ # @param value [String]
96
129
  #
97
130
  # @return [String]
98
- def resolve_srcset_attributes(srcset_attributes)
99
- srcset_attributes.map! do |candidate_string|
100
- # rubocop:disable Style/PerlBackrefs
101
- candidate_string.sub(/^(.+?)(\s+.+)?$/) { "#{resolve_url($1)}#{$2}" }
102
- # rubocop:enable Style/PerlBackrefs
103
- end
131
+ def normalize_srcset_attribute(value)
132
+ parse_image_candidate_strings(value)
133
+ .map! { |captures| "#{normalize_empty_path(captures["url"])}#{captures["descriptor"]}" }
134
+ .join(", ")
135
+ end
104
136
 
105
- srcset_attributes.join(", ")
137
+ # Parse a +srcset+ attribute's value into a +Hash+ of image candidate
138
+ # strings.
139
+ #
140
+ # @see https://html.spec.whatwg.org/#image-candidate-string
141
+ #
142
+ # @param [String]
143
+ #
144
+ # @return [Array<Hash{String => String, nil}>]
145
+ def parse_image_candidate_strings(value)
146
+ value
147
+ .split(/\s*,\s*/)
148
+ .map! { |candidate_string| candidate_string.match(IMAGE_CANDIDATE_STRING_PATTERN).named_captures }
149
+ end
150
+
151
+ # Resolve the URLs in a +srcset+ attribute's image candidate strings.
152
+ #
153
+ # @param value [String]
154
+ #
155
+ # @return [String]
156
+ def resolve_srcset_attribute(value)
157
+ parse_image_candidate_strings(value)
158
+ .map! { |captures| "#{resolve_url(captures["url"])}#{captures["descriptor"]}" }
159
+ .join(", ")
106
160
  end
107
161
  end
108
162
  end
@@ -4,7 +4,7 @@ Gem::Specification.new do |spec|
4
4
  spec.required_ruby_version = ">= 2.7"
5
5
 
6
6
  spec.name = "nokogiri-html-ext"
7
- spec.version = "1.5.0"
7
+ spec.version = "1.6.0"
8
8
  spec.authors = ["Jason Garber"]
9
9
  spec.email = ["jason@sixtwothree.org"]
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri-html-ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Garber
@@ -55,11 +55,11 @@ licenses:
55
55
  - MIT
56
56
  metadata:
57
57
  bug_tracker_uri: https://codeberg.org/jgarber/nokogiri-html-ext/issues
58
- changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.5.0
59
- documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.5.0
58
+ changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.6.0
59
+ documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.6.0
60
60
  homepage_uri: https://codeberg.org/jgarber/nokogiri-html-ext
61
61
  rubygems_mfa_required: 'true'
62
- source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.5.0
62
+ source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.6.0
63
63
  rdoc_options: []
64
64
  require_paths:
65
65
  - lib