nokogiri-html-ext 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -5
- data/lib/nokogiri/html_ext/document.rb +28 -10
- data/lib/nokogiri/html_ext/version.rb +1 -1
- data/nokogiri-html-ext.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 746b9d0d7073149ccc6a40216589a3a1ee5eee93a6c4913c8adefdfcebcd1aa6
|
4
|
+
data.tar.gz: 1319d933924bd467b4130fdadc0207babd2d748278ca0d35a21a37575310c450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad763192112beed01a81baa4826b434e6ef942b3b04e5c7629af130c8a6e3a5225fde4dd2deeb5aab505d4b0c5c228c31170e6b5843c843d50031f2e5b361d2a
|
7
|
+
data.tar.gz: f7e77bfae05a60f2a0f7bab509a9879befc6690e4f0cad879b2ac116faa608646ebe5118ae59fd9a6a3cdb71e32e02ae5186a6ca7b3eca9fda0e66085275d92e
|
data/CHANGELOG.md
CHANGED
@@ -1,24 +1,33 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
##
|
3
|
+
## 0.4.0 / unreleased
|
4
|
+
|
5
|
+
- Improve (hopefully) handling of non-ASCII input (6d1fc4d)
|
6
|
+
- Update Nokogiri version constraint to >= 1.14 (4b7ed74)
|
7
|
+
|
8
|
+
## 0.3.1 / 2023-01-19
|
9
|
+
|
10
|
+
- Revert removal of escaping/unescaping code in relative URL resolution (a78e83a)
|
11
|
+
|
12
|
+
## 0.3.0 / 2023-01-19
|
4
13
|
|
5
14
|
- Remove escaping/unescaping code in relative URL resolution (2de6c5b)
|
6
15
|
- Remove code-scanning-rubocop and rspec-github gems (3b3e625)
|
7
16
|
- Update development Ruby to v2.7.7 (bd328f5)
|
8
17
|
|
9
|
-
##
|
18
|
+
## 0.2.2 / 2022-08-20
|
10
19
|
|
11
20
|
- Improve handling of escaped and invalid URLs (b0d6c75)
|
12
21
|
|
13
|
-
##
|
22
|
+
## 0.2.1 / 2022-08-20
|
14
23
|
|
15
24
|
- Handle escaped URLs and invalid URLs (af78837)
|
16
25
|
- Use ruby/debug gem instead of pry-byebug (4476b9d)
|
17
26
|
|
18
|
-
##
|
27
|
+
## 0.2.0 / 2022-07-02
|
19
28
|
|
20
29
|
- Make `resolve_relative_url` method public (d132dd3)
|
21
30
|
|
22
|
-
##
|
31
|
+
## 0.1.0 / 2022-07-01
|
23
32
|
|
24
33
|
- Initial release! 🎉
|
@@ -5,7 +5,7 @@ require 'nokogiri'
|
|
5
5
|
module Nokogiri
|
6
6
|
module HTML4
|
7
7
|
class Document < Nokogiri::XML::Document
|
8
|
-
# A map of HTML
|
8
|
+
# A map of HTML +srcset+ attributes and their associated element names.
|
9
9
|
#
|
10
10
|
# @see https://html.spec.whatwg.org/#srcset-attributes
|
11
11
|
# @see https://html.spec.whatwg.org/#attributes-3
|
@@ -32,18 +32,18 @@ module Nokogiri
|
|
32
32
|
|
33
33
|
private_constant :URL_ATTRIBUTES_MAP
|
34
34
|
|
35
|
-
# Get the
|
35
|
+
# Get the +<base>+ element's HREF attribute value.
|
36
36
|
#
|
37
37
|
# @return [String, nil]
|
38
38
|
def base_href
|
39
39
|
(base = at_xpath('//base[@href]')) && base['href'].strip
|
40
40
|
end
|
41
41
|
|
42
|
-
# Set the
|
42
|
+
# Set the +<base>+ element's HREF attribute value.
|
43
43
|
#
|
44
|
-
# If a
|
45
|
-
# the given value. If no
|
46
|
-
# one and append it to the document's
|
44
|
+
# If a +<base>+ element exists, its HREF attribute value is replaced with
|
45
|
+
# the given value. If no +<base>+ element exists, this method will create
|
46
|
+
# one and append it to the document's +<head>+ (creating that element if
|
47
47
|
# necessary).
|
48
48
|
#
|
49
49
|
# @param url [String, #to_s]
|
@@ -69,11 +69,18 @@ module Nokogiri
|
|
69
69
|
#
|
70
70
|
# @return [String]
|
71
71
|
def resolve_relative_url(url)
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
url_str = url.to_s
|
73
|
+
|
74
|
+
# Escape each component before joining (Ruby's +URI.parse+ only likes
|
75
|
+
# ASCII) and subsequently unescaping.
|
76
|
+
uri_parser.unescape(
|
77
|
+
uri_parser
|
78
|
+
.join(*[doc_url_str, base_href, url_str].compact.map { |u| uri_parser.escape(u) })
|
79
|
+
.normalize
|
80
|
+
.to_s
|
81
|
+
)
|
75
82
|
rescue URI::InvalidComponentError, URI::InvalidURIError
|
76
|
-
|
83
|
+
url_str
|
77
84
|
end
|
78
85
|
|
79
86
|
# Convert the document's relative URLs to absolute URLs.
|
@@ -97,6 +104,13 @@ module Nokogiri
|
|
97
104
|
|
98
105
|
private
|
99
106
|
|
107
|
+
# +Nokogiri::HTML4::Document#url+ may be double-escaped if the parser
|
108
|
+
# detects non-ASCII characters. For example, +https://[skull emoji].example+
|
109
|
+
# is returned as +"https%3A//%25E2%2598%25A0%25EF%25B8%258F.example+.
|
110
|
+
def doc_url_str
|
111
|
+
@doc_url_str ||= uri_parser.unescape(uri_parser.unescape(document.url)).strip
|
112
|
+
end
|
113
|
+
|
100
114
|
def resolve_relative_urls_for(attributes_map)
|
101
115
|
attributes_map.each do |attribute, names|
|
102
116
|
xpaths = names.map { |name| "//#{name}[@#{attribute}]" }
|
@@ -106,6 +120,10 @@ module Nokogiri
|
|
106
120
|
end
|
107
121
|
end
|
108
122
|
end
|
123
|
+
|
124
|
+
def uri_parser
|
125
|
+
@uri_parser ||= URI::DEFAULT_PARSER
|
126
|
+
end
|
109
127
|
end
|
110
128
|
end
|
111
129
|
end
|
data/nokogiri-html-ext.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri-html-ext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.14'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.14'
|
27
27
|
description: Extend Nokogiri with several useful HTML-centric features.
|
28
28
|
email:
|
29
29
|
- jason@sixtwothree.org
|
@@ -44,7 +44,7 @@ licenses:
|
|
44
44
|
- MIT
|
45
45
|
metadata:
|
46
46
|
bug_tracker_uri: https://github.com/jgarber623/nokogiri-html-ext/issues
|
47
|
-
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.
|
47
|
+
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.4.0/CHANGELOG.md
|
48
48
|
rubygems_mfa_required: 'true'
|
49
49
|
post_install_message:
|
50
50
|
rdoc_options: []
|