nokogiri-html-ext 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c5f1ccd4ab43e822b1bc075f4b84c212b9ab539e07b206d2b65499a775ed539
4
- data.tar.gz: 15ef0baaa308692662f39e238bedba2029e336b53d753190182b19b770e0ea6b
3
+ metadata.gz: 307b06335c0935cec4304b10c13ef113c514646d5a746f703ccb1567a39120ce
4
+ data.tar.gz: 26894c4ef4b3a018f2b776f4929035e590a8c9514af529fce581c23fb29e4cf0
5
5
  SHA512:
6
- metadata.gz: 2ba7cba037735ea0323abae2828d0c8abd576886b97fef12b8319add735dedbe77ada5433f47e4f42a240855ff196dd178925a61bcbd99e7be834bf500423cd7
7
- data.tar.gz: 53ddcdcc16b93fdf00947acaec6764d56f95665cac5bc96868a2a20a627b917907c1fa7182639d89d0359e5c0eed6ca4932cb112e2944de9403fca83d888a5e2
6
+ metadata.gz: 66057c43dd9cc165dcd12603f8546119f39292305b3d943f5930c138686acbec53d2dfd744565343fe1f2351a8aa9a46b84a6d7d1ba7cafe7f10dd17be19548e
7
+ data.tar.gz: 52e9543d90ed8dc1790458f660a02cdb7c4574cb8c3fc7a093b7780cbe4627a8badc6ef84d51aae7acdc7ad5aa2e88cdec38e5eb597acffd6f8e8652950b62f5
@@ -3,6 +3,9 @@
3
3
  module Nokogiri
4
4
  module HTML4
5
5
  class Document < Nokogiri::XML::Document
6
+ # @see https://html.spec.whatwg.org/#image-candidate-string
7
+ IMAGE_CANDIDATE_STRING_PATTERN = /^(?<url>.+?)(?<descriptor>\s+.+)?$/.freeze
8
+
6
9
  # A map of HTML +srcset+ attributes and their associated element names.
7
10
  #
8
11
  # @see https://html.spec.whatwg.org/#srcset-attributes
@@ -56,37 +59,62 @@ module Nokogiri
56
59
  base["href"] = url
57
60
  end
58
61
 
59
- # Convert a relative URL to an absolute URL based on the current document.
62
+ # Conditionally normalize a URL's empty path component.
60
63
  #
61
- # @param url [String]
64
+ # @param [String]
62
65
  #
63
66
  # @return [String]
64
- def resolve_relative_url(url)
65
- strs = [document.url, base_href, url].filter_map { |str| str&.strip }
66
-
67
- uri = Addressable::URI.join(*strs)
67
+ def normalize_empty_path(url)
68
+ uri = Addressable::URI.parse(url.strip)
68
69
 
69
- # Normalize an empty path component.
70
- #
71
- # @see https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.3
72
- # RFC 3986 Uniform Resource Identifier (URI): Generic Syntax § 6.2.3. Scheme-Based Normalization
73
- uri.path = uri.normalized_path if uri.path.empty?
70
+ uri.path = uri.normalized_path if uri.path.empty? && uri.path != uri.normalized_path
74
71
 
75
72
  uri.to_s
76
73
  rescue Addressable::URI::InvalidURIError
77
74
  url
78
75
  end
79
76
 
80
- # Convert the document's relative URLs to absolute URLs.
77
+ # Normalize the document's URLs whose path components are empty.
81
78
  #
82
79
  # @return [self]
83
- def resolve_relative_urls!
80
+ def normalize_empty_paths!
84
81
  xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
85
82
  attr_node.value =
86
83
  if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
87
- resolve_srcset_attributes(attr_node.value.split(/\s*,\s*/))
84
+ normalize_srcset_attribute(attr_node.value)
88
85
  else
89
- resolve_relative_url(attr_node.value)
86
+ normalize_empty_path(attr_node.value)
87
+ end
88
+ end
89
+
90
+ self
91
+ end
92
+
93
+ # Resolve a URL based on the current document.
94
+ #
95
+ # @param url [String]
96
+ #
97
+ # @return [String]
98
+ def resolve_url(url)
99
+ uris = [document.url, base_href, url].filter_map { |uri| Addressable::URI.parse(uri&.strip) }
100
+
101
+ return uris.last.to_s if uris.last.absolute?
102
+
103
+ Addressable::URI.join(*uris).to_s
104
+ rescue Addressable::URI::InvalidURIError
105
+ url
106
+ end
107
+
108
+ # Resolve the document's URLs
109
+ #
110
+ # @return [self]
111
+ def resolve_urls!
112
+ xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
113
+ attr_node.value =
114
+ if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
115
+ resolve_srcset_attribute(attr_node.value)
116
+ else
117
+ resolve_url(attr_node.value)
90
118
  end
91
119
  end
92
120
 
@@ -95,20 +123,40 @@ module Nokogiri
95
123
 
96
124
  private
97
125
 
98
- # Resolve a set of +String+s that represent +srcset+ attribute image
99
- # candidate strings.
126
+ # Normalize the URLs in a +srcset+ attribute's image candidate strings.
100
127
  #
101
- # @param srcset_attributes [Array<String>]
128
+ # @param value [String]
102
129
  #
103
130
  # @return [String]
104
- def resolve_srcset_attributes(srcset_attributes)
105
- srcset_attributes.map! do |candidate_string|
106
- # rubocop:disable Style/PerlBackrefs
107
- candidate_string.sub(/^(.+?)(\s+.+)?$/) { "#{resolve_relative_url($1)}#{$2}" }
108
- # rubocop:enable Style/PerlBackrefs
109
- end
131
+ def normalize_srcset_attribute(value)
132
+ parse_image_candidate_strings(value)
133
+ .map! { |captures| "#{normalize_empty_path(captures["url"])}#{captures["descriptor"]}" }
134
+ .join(", ")
135
+ end
110
136
 
111
- srcset_attributes.join(", ")
137
+ # Parse a +srcset+ attribute's value into a +Hash+ of image candidate
138
+ # strings.
139
+ #
140
+ # @see https://html.spec.whatwg.org/#image-candidate-string
141
+ #
142
+ # @param [String]
143
+ #
144
+ # @return [Array<Hash{String => String, nil}>]
145
+ def parse_image_candidate_strings(value)
146
+ value
147
+ .split(/\s*,\s*/)
148
+ .map! { |candidate_string| candidate_string.match(IMAGE_CANDIDATE_STRING_PATTERN).named_captures }
149
+ end
150
+
151
+ # Resolve the URLs in a +srcset+ attribute's image candidate strings.
152
+ #
153
+ # @param value [String]
154
+ #
155
+ # @return [String]
156
+ def resolve_srcset_attribute(value)
157
+ parse_image_candidate_strings(value)
158
+ .map! { |captures| "#{resolve_url(captures["url"])}#{captures["descriptor"]}" }
159
+ .join(", ")
112
160
  end
113
161
  end
114
162
  end
@@ -4,7 +4,7 @@ Gem::Specification.new do |spec|
4
4
  spec.required_ruby_version = ">= 2.7"
5
5
 
6
6
  spec.name = "nokogiri-html-ext"
7
- spec.version = "1.4.1"
7
+ spec.version = "1.6.0"
8
8
  spec.authors = ["Jason Garber"]
9
9
  spec.email = ["jason@sixtwothree.org"]
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri-html-ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Garber
@@ -55,11 +55,11 @@ licenses:
55
55
  - MIT
56
56
  metadata:
57
57
  bug_tracker_uri: https://codeberg.org/jgarber/nokogiri-html-ext/issues
58
- changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.4.1
59
- documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.4.1
58
+ changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.6.0
59
+ documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.6.0
60
60
  homepage_uri: https://codeberg.org/jgarber/nokogiri-html-ext
61
61
  rubygems_mfa_required: 'true'
62
- source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.4.1
62
+ source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.6.0
63
63
  rdoc_options: []
64
64
  require_paths:
65
65
  - lib