nokogiri-html-ext 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/nokogiri/html_ext/document.rb +20 -10
- data/lib/nokogiri/html_ext/version.rb +1 -1
- data/nokogiri-html-ext.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 746b9d0d7073149ccc6a40216589a3a1ee5eee93a6c4913c8adefdfcebcd1aa6
|
4
|
+
data.tar.gz: 1319d933924bd467b4130fdadc0207babd2d748278ca0d35a21a37575310c450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad763192112beed01a81baa4826b434e6ef942b3b04e5c7629af130c8a6e3a5225fde4dd2deeb5aab505d4b0c5c228c31170e6b5843c843d50031f2e5b361d2a
|
7
|
+
data.tar.gz: f7e77bfae05a60f2a0f7bab509a9879befc6690e4f0cad879b2ac116faa608646ebe5118ae59fd9a6a3cdb71e32e02ae5186a6ca7b3eca9fda0e66085275d92e
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 0.4.0 / unreleased
|
4
|
+
|
5
|
+
- Improve (hopefully) handling of non-ASCII input (6d1fc4d)
|
6
|
+
- Update Nokogiri version constraint to >= 1.14 (4b7ed74)
|
7
|
+
|
3
8
|
## 0.3.1 / 2023-01-19
|
4
9
|
|
5
10
|
- Revert removal of escaping/unescaping code in relative URL resolution (a78e83a)
|
@@ -5,7 +5,7 @@ require 'nokogiri'
|
|
5
5
|
module Nokogiri
|
6
6
|
module HTML4
|
7
7
|
class Document < Nokogiri::XML::Document
|
8
|
-
# A map of HTML
|
8
|
+
# A map of HTML +srcset+ attributes and their associated element names.
|
9
9
|
#
|
10
10
|
# @see https://html.spec.whatwg.org/#srcset-attributes
|
11
11
|
# @see https://html.spec.whatwg.org/#attributes-3
|
@@ -32,18 +32,18 @@ module Nokogiri
|
|
32
32
|
|
33
33
|
private_constant :URL_ATTRIBUTES_MAP
|
34
34
|
|
35
|
-
# Get the
|
35
|
+
# Get the +<base>+ element's HREF attribute value.
|
36
36
|
#
|
37
37
|
# @return [String, nil]
|
38
38
|
def base_href
|
39
39
|
(base = at_xpath('//base[@href]')) && base['href'].strip
|
40
40
|
end
|
41
41
|
|
42
|
-
# Set the
|
42
|
+
# Set the +<base>+ element's HREF attribute value.
|
43
43
|
#
|
44
|
-
# If a
|
45
|
-
# the given value. If no
|
46
|
-
# one and append it to the document's
|
44
|
+
# If a +<base>+ element exists, its HREF attribute value is replaced with
|
45
|
+
# the given value. If no +<base>+ element exists, this method will create
|
46
|
+
# one and append it to the document's +<head>+ (creating that element if
|
47
47
|
# necessary).
|
48
48
|
#
|
49
49
|
# @param url [String, #to_s]
|
@@ -71,13 +71,16 @@ module Nokogiri
|
|
71
71
|
def resolve_relative_url(url)
|
72
72
|
url_str = url.to_s
|
73
73
|
|
74
|
+
# Escape each component before joining (Ruby's +URI.parse+ only likes
|
75
|
+
# ASCII) and subsequently unescaping.
|
74
76
|
uri_parser.unescape(
|
75
|
-
uri_parser
|
76
|
-
|
77
|
-
|
77
|
+
uri_parser
|
78
|
+
.join(*[doc_url_str, base_href, url_str].compact.map { |u| uri_parser.escape(u) })
|
79
|
+
.normalize
|
80
|
+
.to_s
|
78
81
|
)
|
79
82
|
rescue URI::InvalidComponentError, URI::InvalidURIError
|
80
|
-
|
83
|
+
url_str
|
81
84
|
end
|
82
85
|
|
83
86
|
# Convert the document's relative URLs to absolute URLs.
|
@@ -101,6 +104,13 @@ module Nokogiri
|
|
101
104
|
|
102
105
|
private
|
103
106
|
|
107
|
+
# +Nokogiri::HTML4::Document#url+ may be double-escaped if the parser
|
108
|
+
# detects non-ASCII characters. For example, +https://[skull emoji].example+
|
109
|
+
# is returned as +"https%3A//%25E2%2598%25A0%25EF%25B8%258F.example+.
|
110
|
+
def doc_url_str
|
111
|
+
@doc_url_str ||= uri_parser.unescape(uri_parser.unescape(document.url)).strip
|
112
|
+
end
|
113
|
+
|
104
114
|
def resolve_relative_urls_for(attributes_map)
|
105
115
|
attributes_map.each do |attribute, names|
|
106
116
|
xpaths = names.map { |name| "//#{name}[@#{attribute}]" }
|
data/nokogiri-html-ext.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri-html-ext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.14'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.14'
|
27
27
|
description: Extend Nokogiri with several useful HTML-centric features.
|
28
28
|
email:
|
29
29
|
- jason@sixtwothree.org
|
@@ -44,7 +44,7 @@ licenses:
|
|
44
44
|
- MIT
|
45
45
|
metadata:
|
46
46
|
bug_tracker_uri: https://github.com/jgarber623/nokogiri-html-ext/issues
|
47
|
-
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.
|
47
|
+
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.4.0/CHANGELOG.md
|
48
48
|
rubygems_mfa_required: 'true'
|
49
49
|
post_install_message:
|
50
50
|
rdoc_options: []
|