nokogiri-html-ext 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/nokogiri/html_ext/document.rb +73 -25
- data/nokogiri-html-ext.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 307b06335c0935cec4304b10c13ef113c514646d5a746f703ccb1567a39120ce
|
4
|
+
data.tar.gz: 26894c4ef4b3a018f2b776f4929035e590a8c9514af529fce581c23fb29e4cf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66057c43dd9cc165dcd12603f8546119f39292305b3d943f5930c138686acbec53d2dfd744565343fe1f2351a8aa9a46b84a6d7d1ba7cafe7f10dd17be19548e
|
7
|
+
data.tar.gz: 52e9543d90ed8dc1790458f660a02cdb7c4574cb8c3fc7a093b7780cbe4627a8badc6ef84d51aae7acdc7ad5aa2e88cdec38e5eb597acffd6f8e8652950b62f5
|
@@ -3,6 +3,9 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module HTML4
|
5
5
|
class Document < Nokogiri::XML::Document
|
6
|
+
# @see https://html.spec.whatwg.org/#image-candidate-string
|
7
|
+
IMAGE_CANDIDATE_STRING_PATTERN = /^(?<url>.+?)(?<descriptor>\s+.+)?$/.freeze
|
8
|
+
|
6
9
|
# A map of HTML +srcset+ attributes and their associated element names.
|
7
10
|
#
|
8
11
|
# @see https://html.spec.whatwg.org/#srcset-attributes
|
@@ -56,37 +59,62 @@ module Nokogiri
|
|
56
59
|
base["href"] = url
|
57
60
|
end
|
58
61
|
|
59
|
-
#
|
62
|
+
# Conditionally normalize a URL's empty path component.
|
60
63
|
#
|
61
|
-
# @param
|
64
|
+
# @param [String]
|
62
65
|
#
|
63
66
|
# @return [String]
|
64
|
-
def
|
65
|
-
|
66
|
-
|
67
|
-
uri = Addressable::URI.join(*strs)
|
67
|
+
def normalize_empty_path(url)
|
68
|
+
uri = Addressable::URI.parse(url.strip)
|
68
69
|
|
69
|
-
|
70
|
-
#
|
71
|
-
# @see https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.3
|
72
|
-
# RFC 3986 Uniform Resource Identifier (URI): Generic Syntax § 6.2.3. Scheme-Based Normalization
|
73
|
-
uri.path = uri.normalized_path if uri.path.empty?
|
70
|
+
uri.path = uri.normalized_path if uri.path.empty? && uri.path != uri.normalized_path
|
74
71
|
|
75
72
|
uri.to_s
|
76
73
|
rescue Addressable::URI::InvalidURIError
|
77
74
|
url
|
78
75
|
end
|
79
76
|
|
80
|
-
#
|
77
|
+
# Normalize the document's URLs whose path components are empty.
|
81
78
|
#
|
82
79
|
# @return [self]
|
83
|
-
def
|
80
|
+
def normalize_empty_paths!
|
84
81
|
xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
|
85
82
|
attr_node.value =
|
86
83
|
if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
|
87
|
-
|
84
|
+
normalize_srcset_attribute(attr_node.value)
|
88
85
|
else
|
89
|
-
|
86
|
+
normalize_empty_path(attr_node.value)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
self
|
91
|
+
end
|
92
|
+
|
93
|
+
# Resolve a URL based on the current document.
|
94
|
+
#
|
95
|
+
# @param url [String]
|
96
|
+
#
|
97
|
+
# @return [String]
|
98
|
+
def resolve_url(url)
|
99
|
+
uris = [document.url, base_href, url].filter_map { |uri| Addressable::URI.parse(uri&.strip) }
|
100
|
+
|
101
|
+
return uris.last.to_s if uris.last.absolute?
|
102
|
+
|
103
|
+
Addressable::URI.join(*uris).to_s
|
104
|
+
rescue Addressable::URI::InvalidURIError
|
105
|
+
url
|
106
|
+
end
|
107
|
+
|
108
|
+
# Resolve the document's URLs
|
109
|
+
#
|
110
|
+
# @return [self]
|
111
|
+
def resolve_urls!
|
112
|
+
xpath(*ATTRIBUTES_XPATHS).each do |attr_node|
|
113
|
+
attr_node.value =
|
114
|
+
if SRCSET_ATTRIBUTES_MAP.key?(attr_node.name)
|
115
|
+
resolve_srcset_attribute(attr_node.value)
|
116
|
+
else
|
117
|
+
resolve_url(attr_node.value)
|
90
118
|
end
|
91
119
|
end
|
92
120
|
|
@@ -95,20 +123,40 @@ module Nokogiri
|
|
95
123
|
|
96
124
|
private
|
97
125
|
|
98
|
-
#
|
99
|
-
# candidate strings.
|
126
|
+
# Normalize the URLs in a +srcset+ attribute's image candidate strings.
|
100
127
|
#
|
101
|
-
# @param
|
128
|
+
# @param value [String]
|
102
129
|
#
|
103
130
|
# @return [String]
|
104
|
-
def
|
105
|
-
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
end
|
131
|
+
def normalize_srcset_attribute(value)
|
132
|
+
parse_image_candidate_strings(value)
|
133
|
+
.map! { |captures| "#{normalize_empty_path(captures["url"])}#{captures["descriptor"]}" }
|
134
|
+
.join(", ")
|
135
|
+
end
|
110
136
|
|
111
|
-
|
137
|
+
# Parse a +srcset+ attribute's value into a +Hash+ of image candidate
|
138
|
+
# strings.
|
139
|
+
#
|
140
|
+
# @see https://html.spec.whatwg.org/#image-candidate-string
|
141
|
+
#
|
142
|
+
# @param [String]
|
143
|
+
#
|
144
|
+
# @return [Array<Hash{String => String, nil}>]
|
145
|
+
def parse_image_candidate_strings(value)
|
146
|
+
value
|
147
|
+
.split(/\s*,\s*/)
|
148
|
+
.map! { |candidate_string| candidate_string.match(IMAGE_CANDIDATE_STRING_PATTERN).named_captures }
|
149
|
+
end
|
150
|
+
|
151
|
+
# Resolve the URLs in a +srcset+ attribute's image candidate strings.
|
152
|
+
#
|
153
|
+
# @param value [String]
|
154
|
+
#
|
155
|
+
# @return [String]
|
156
|
+
def resolve_srcset_attribute(value)
|
157
|
+
parse_image_candidate_strings(value)
|
158
|
+
.map! { |captures| "#{resolve_url(captures["url"])}#{captures["descriptor"]}" }
|
159
|
+
.join(", ")
|
112
160
|
end
|
113
161
|
end
|
114
162
|
end
|
data/nokogiri-html-ext.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri-html-ext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
@@ -55,11 +55,11 @@ licenses:
|
|
55
55
|
- MIT
|
56
56
|
metadata:
|
57
57
|
bug_tracker_uri: https://codeberg.org/jgarber/nokogiri-html-ext/issues
|
58
|
-
changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.
|
59
|
-
documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.
|
58
|
+
changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.6.0
|
59
|
+
documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.6.0
|
60
60
|
homepage_uri: https://codeberg.org/jgarber/nokogiri-html-ext
|
61
61
|
rubygems_mfa_required: 'true'
|
62
|
-
source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.
|
62
|
+
source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.6.0
|
63
63
|
rdoc_options: []
|
64
64
|
require_paths:
|
65
65
|
- lib
|