nokogiri-html-ext 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 67354c8dbf65774b9c418bc69722ba7f97c9f852913784e9227c0bc9f9a5dff7
4
- data.tar.gz: f4ba45c896400d9dc5a8e5b84365f8b4238b456b01a4311ef962ff8353db6c8d
3
+ metadata.gz: 746b9d0d7073149ccc6a40216589a3a1ee5eee93a6c4913c8adefdfcebcd1aa6
4
+ data.tar.gz: 1319d933924bd467b4130fdadc0207babd2d748278ca0d35a21a37575310c450
5
5
  SHA512:
6
- metadata.gz: d7b8edd22d5545c56e2564e255d87ac894691541efdb6e15e284b9194ea5078ac5b3a6b0cb477900491562884c56fe6246b371c8ad8def23c6a6bea92365bc32
7
- data.tar.gz: 6a5a6d1e19c1e1380012152b59a9a0baf68512c36a880b3810713edb98ea6830ef006ca67f03a0e15e98b813b8e23baed72d25c2a5b2b98d0d6c58460ca4e29c
6
+ metadata.gz: ad763192112beed01a81baa4826b434e6ef942b3b04e5c7629af130c8a6e3a5225fde4dd2deeb5aab505d4b0c5c228c31170e6b5843c843d50031f2e5b361d2a
7
+ data.tar.gz: f7e77bfae05a60f2a0f7bab509a9879befc6690e4f0cad879b2ac116faa608646ebe5118ae59fd9a6a3cdb71e32e02ae5186a6ca7b3eca9fda0e66085275d92e
data/CHANGELOG.md CHANGED
@@ -1,24 +1,33 @@
1
1
  # Changelog
2
2
 
3
- ## v0.3.0 / 2023-01-19
3
+ ## 0.4.0 / unreleased
4
+
5
+ - Improve (hopefully) handling of non-ASCII input (6d1fc4d)
6
+ - Update Nokogiri version constraint to >= 1.14 (4b7ed74)
7
+
8
+ ## 0.3.1 / 2023-01-19
9
+
10
+ - Revert removal of escaping/unescaping code in relative URL resolution (a78e83a)
11
+
12
+ ## 0.3.0 / 2023-01-19
4
13
 
5
14
  - Remove escaping/unescaping code in relative URL resolution (2de6c5b)
6
15
  - Remove code-scanning-rubocop and rspec-github gems (3b3e625)
7
16
  - Update development Ruby to v2.7.7 (bd328f5)
8
17
 
9
- ## v0.2.2 / 2022-08-20
18
+ ## 0.2.2 / 2022-08-20
10
19
 
11
20
  - Improve handling of escaped and invalid URLs (b0d6c75)
12
21
 
13
- ## v0.2.1 / 2022-08-20
22
+ ## 0.2.1 / 2022-08-20
14
23
 
15
24
  - Handle escaped URLs and invalid URLs (af78837)
16
25
  - Use ruby/debug gem instead of pry-byebug (4476b9d)
17
26
 
18
- ## v0.2.0 / 2022-07-02
27
+ ## 0.2.0 / 2022-07-02
19
28
 
20
29
  - Make `resolve_relative_url` method public (d132dd3)
21
30
 
22
- ## v0.1.0 / 2022-07-01
31
+ ## 0.1.0 / 2022-07-01
23
32
 
24
33
  - Initial release! 🎉
@@ -5,7 +5,7 @@ require 'nokogiri'
5
5
  module Nokogiri
6
6
  module HTML4
7
7
  class Document < Nokogiri::XML::Document
8
- # A map of HTML `srcset` attributes and their associated element names.
8
+ # A map of HTML +srcset+ attributes and their associated element names.
9
9
  #
10
10
  # @see https://html.spec.whatwg.org/#srcset-attributes
11
11
  # @see https://html.spec.whatwg.org/#attributes-3
@@ -32,18 +32,18 @@ module Nokogiri
32
32
 
33
33
  private_constant :URL_ATTRIBUTES_MAP
34
34
 
35
- # Get the <base> element's HREF attribute value.
35
+ # Get the +<base>+ element's HREF attribute value.
36
36
  #
37
37
  # @return [String, nil]
38
38
  def base_href
39
39
  (base = at_xpath('//base[@href]')) && base['href'].strip
40
40
  end
41
41
 
42
- # Set the <base> element's HREF attribute value.
42
+ # Set the +<base>+ element's HREF attribute value.
43
43
  #
44
- # If a <base> element exists, its HREF attribute value is replaced with
45
- # the given value. If no <base> element exists, this method will create
46
- # one and append it to the document's <head> (creating that element if
44
+ # If a +<base>+ element exists, its HREF attribute value is replaced with
45
+ # the given value. If no +<base>+ element exists, this method will create
46
+ # one and append it to the document's +<head>+ (creating that element if
47
47
  # necessary).
48
48
  #
49
49
  # @param url [String, #to_s]
@@ -69,11 +69,18 @@ module Nokogiri
69
69
  #
70
70
  # @return [String]
71
71
  def resolve_relative_url(url)
72
- URI::DEFAULT_PARSER.join(*[document.url.strip, base_href, url.to_s].compact)
73
- .normalize
74
- .to_s
72
+ url_str = url.to_s
73
+
74
+ # Escape each component before joining (Ruby's +URI.parse+ only likes
75
+ # ASCII) and subsequently unescaping.
76
+ uri_parser.unescape(
77
+ uri_parser
78
+ .join(*[doc_url_str, base_href, url_str].compact.map { |u| uri_parser.escape(u) })
79
+ .normalize
80
+ .to_s
81
+ )
75
82
  rescue URI::InvalidComponentError, URI::InvalidURIError
76
- url
83
+ url_str
77
84
  end
78
85
 
79
86
  # Convert the document's relative URLs to absolute URLs.
@@ -97,6 +104,13 @@ module Nokogiri
97
104
 
98
105
  private
99
106
 
107
+ # +Nokogiri::HTML4::Document#url+ may be double-escaped if the parser
108
+ # detects non-ASCII characters. For example, +https://[skull emoji].example+
109
+ # is returned as +"https%3A//%25E2%2598%25A0%25EF%25B8%258F.example+.
110
+ def doc_url_str
111
+ @doc_url_str ||= uri_parser.unescape(uri_parser.unescape(document.url)).strip
112
+ end
113
+
100
114
  def resolve_relative_urls_for(attributes_map)
101
115
  attributes_map.each do |attribute, names|
102
116
  xpaths = names.map { |name| "//#{name}[@#{attribute}]" }
@@ -106,6 +120,10 @@ module Nokogiri
106
120
  end
107
121
  end
108
122
  end
123
+
124
+ def uri_parser
125
+ @uri_parser ||= URI::DEFAULT_PARSER
126
+ end
109
127
  end
110
128
  end
111
129
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Nokogiri
4
4
  module HTMLExt
5
- VERSION = '0.3.0'
5
+ VERSION = '0.4.0'
6
6
  end
7
7
  end
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
27
27
  'rubygems_mfa_required' => 'true'
28
28
  }
29
29
 
30
- spec.add_runtime_dependency 'nokogiri', '>= 1.13'
30
+ spec.add_runtime_dependency 'nokogiri', '>= 1.14'
31
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri-html-ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jason Garber
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-19 00:00:00.000000000 Z
11
+ date: 2023-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.13'
19
+ version: '1.14'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.13'
26
+ version: '1.14'
27
27
  description: Extend Nokogiri with several useful HTML-centric features.
28
28
  email:
29
29
  - jason@sixtwothree.org
@@ -44,7 +44,7 @@ licenses:
44
44
  - MIT
45
45
  metadata:
46
46
  bug_tracker_uri: https://github.com/jgarber623/nokogiri-html-ext/issues
47
- changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.3.0/CHANGELOG.md
47
+ changelog_uri: https://github.com/jgarber623/nokogiri-html-ext/blob/v0.4.0/CHANGELOG.md
48
48
  rubygems_mfa_required: 'true'
49
49
  post_install_message:
50
50
  rdoc_options: []