nokogiri-html-ext 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -5
- data/lib/nokogiri/html_ext/document.rb +28 -10
- data/lib/nokogiri/html_ext/version.rb +1 -1
- data/nokogiri-html-ext.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 746b9d0d7073149ccc6a40216589a3a1ee5eee93a6c4913c8adefdfcebcd1aa6
|
4
|
+
data.tar.gz: 1319d933924bd467b4130fdadc0207babd2d748278ca0d35a21a37575310c450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ad763192112beed01a81baa4826b434e6ef942b3b04e5c7629af130c8a6e3a5225fde4dd2deeb5aab505d4b0c5c228c31170e6b5843c843d50031f2e5b361d2a
|
7
|
+
data.tar.gz: f7e77bfae05a60f2a0f7bab509a9879befc6690e4f0cad879b2ac116faa608646ebe5118ae59fd9a6a3cdb71e32e02ae5186a6ca7b3eca9fda0e66085275d92e
|
data/CHANGELOG.md
CHANGED
@@ -1,24 +1,33 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
##
|
3
|
+
## 0.4.0 / unreleased
|
4
|
+
|
5
|
+
- Improve (hopefully) handling of non-ASCII input (6d1fc4d)
|
6
|
+
- Update Nokogiri version constraint to >= 1.14 (4b7ed74)
|
7
|
+
|
8
|
+
## 0.3.1 / 2023-01-19
|
9
|
+
|
10
|
+
- Revert removal of escaping/unescaping code in relative URL resolution (a78e83a)
|
11
|
+
|
12
|
+
## 0.3.0 / 2023-01-19
|
4
13
|
|
5
14
|
- Remove escaping/unescaping code in relative URL resolution (2de6c5b)
|
6
15
|
- Remove code-scanning-rubocop and rspec-github gems (3b3e625)
|
7
16
|
- Update development Ruby to v2.7.7 (bd328f5)
|
8
17
|
|
9
|
-
##
|
18
|
+
## 0.2.2 / 2022-08-20
|
10
19
|
|
11
20
|
- Improve handling of escaped and invalid URLs (b0d6c75)
|
12
21
|
|
13
|
-
##
|
22
|
+
## 0.2.1 / 2022-08-20
|
14
23
|
|
15
24
|
- Handle escaped URLs and invalid URLs (af78837)
|
16
25
|
- Use ruby/debug gem instead of pry-byebug (4476b9d)
|
17
26
|
|
18
|
-
##
|
27
|
+
## 0.2.0 / 2022-07-02
|
19
28
|
|
20
29
|
- Make `resolve_relative_url` method public (d132dd3)
|
21
30
|
|
22
|
-
##
|
31
|
+
## 0.1.0 / 2022-07-01
|
23
32
|
|
24
33
|
- Initial release! 🎉
|
@@ -5,7 +5,7 @@ require 'nokogiri'
|
|
5
5
|
module Nokogiri
|
6
6
|
module HTML4
|
7
7
|
class Document < Nokogiri::XML::Document
|
8
|
-
# A map of HTML
|
8
|
+
# A map of HTML +srcset+ attributes and their associated element names.
|
9
9
|
#
|
10
10
|
# @see https://html.spec.whatwg.org/#srcset-attributes
|
11
11
|
# @see https://html.spec.whatwg.org/#attributes-3
|
@@ -32,18 +32,18 @@ module Nokogiri
|
|
32
32
|
|
33
33
|
private_constant :URL_ATTRIBUTES_MAP
|
34
34
|
|
35
|
-
# Get the
|
35
|
+
# Get the +<base>+ element's HREF attribute value.
|
36
36
|
#
|
37
37
|
# @return [String, nil]
|
38
38
|
def base_href
|
39
39
|
(base = at_xpath('//base[@href]')) && base['href'].strip
|
40
40
|
end
|
41
41
|
|
42
|
-
# Set the
|
42
|
+
# Set the +<base>+ element's HREF attribute value.
|
43
43
|
#
|
44
|
-
# If a
|
45
|
-
# the given value. If no
|
46
|
-
# one and append it to the document's
|
44
|
+
# If a +<base>+ element exists, its HREF attribute value is replaced with
|
45
|
+
# the given value. If no +<base>+ element exists, this method will create
|
46
|
+
# one and append it to the document's +<head>+ (creating that element if
|
47
47
|
# necessary).
|
48
48
|
#
|
49
49
|
# @param url [String, #to_s]
|
@@ -69,11 +69,18 @@ module Nokogiri
|
|
69
69
|
#
|
70
70
|
# @return [String]
|
71
71
|
def resolve_relative_url(url)
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
url_str = url.to_s
|
73
|
+
|
74
|
+
# Escape each component before joining (Ruby's +URI.parse+ only likes
|
75
|
+
# ASCII) and subsequently unescaping.
|
76
|
+
uri_parser.unescape(
|
77
|
+
uri_parser
|
78
|
+
.join(*[doc_url_str, base_href, url_str].compact.map { |u| uri_parser.escape(u) })
|
79
|
+
.normalize
|
80
|
+
.to_s
|
81
|
+
)
|
75
82
|
rescue URI::InvalidComponentError, URI::InvalidURIError
|
76
|
-
|
83
|
+
url_str
|
77
84
|
end
|
78
85
|
|
79
86
|
# Convert the document's relative URLs to absolute URLs.
|
@@ -97,6 +104,13 @@ module Nokogiri
|
|
97
104
|
|
98
105
|
private
|
99
106
|
|
107
|
+
# +Nokogiri::HTML4::Document#url+ may be double-escaped if the parser
|
108
|
+
# detects non-ASCII characters. For example, +https://[skull emoji].example+
|
109
|
+
# is returned as +"https%3A//%25E2%2598%25A0%25EF%25B8%258F.example+.
|
110
|
+
def doc_url_str
|
111
|
+
@doc_url_str ||= uri_parser.unescape(uri_parser.unescape(document.url)).strip
|
112
|
+
end
|
113
|
+
|
100
114
|
def resolve_relative_urls_for(attributes_map)
|
101
115
|
attributes_map.each do |attribute, names|
|
102
116
|
xpaths = names.map { |name| "//#{name}[@#{attribute}]" }
|
@@ -106,6 +120,10 @@ module Nokogiri
|
|
106
120
|
end
|
107
121
|
end
|
108
122
|
end
|
123
|
+
|
124
|
+
def uri_parser
|
125
|
+
@uri_parser ||= URI::DEFAULT_PARSER
|
126
|
+
end
|
109
127
|
end
|
110
128
|
end
|
111
129
|
end
|
data/nokogiri-html-ext.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri-html-ext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.14'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.14'
|
27
27
|
description: Extend Nokogiri with several useful HTML-centric features.
|
28
28
|
email:
|
29
29
|
- jason@sixtwothree.org
|
@@ -44,7 +44,7 @@ licenses:
|
|
44
44
|
- MIT
|
45
45
|
metadata:
|
46
46
|
bug_tracker_uri: https://github.com/jgarber623/nokogiri-html-ext/issues
|
47
|
-
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.
|
47
|
+
changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.4.0/CHANGELOG.md
|
48
48
|
rubygems_mfa_required: 'true'
|
49
49
|
post_install_message:
|
50
50
|
rdoc_options: []
|