nokogiri-html-ext 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -3
- data/lib/nokogiri/html-ext.rb +1 -1
- data/lib/nokogiri/html_ext/document.rb +8 -18
- data/nokogiri-html-ext.gemspec +2 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81844b7536086078b24d1600282bb324c1325cc62b527b45bf81a13090ff413a
|
4
|
+
data.tar.gz: fdda5725873079052a6315c6d19a388dce12586c54ed88cb42321f215f2826fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 307a4c317c840578338d253d1c6c8586ab863ec97f32cde0339069c8a096220c6863d718d305f000357cee68fdfb4e5b36e7cc8a91eb27e7bcd55045b7cf7a77
|
7
|
+
data.tar.gz: d1701b55fe58d9b80378b2a9b09a5c73a0c528895869aa99ecd01ce6d4b291c6108708b5eb6ea55c1201e545c975e85af14489b58272282473451b27a4c2e929
|
data/README.md
CHANGED
@@ -79,11 +79,12 @@ doc.at_css("base").to_s
|
|
79
79
|
|
80
80
|
nokogiri-html-ext will resolve a document's relative URLs against a provided source URL. The source URL _should_ be an absolute URL (e.g. `https://jgarber.example`) representing the location of the document being parsed. The source URL _may_ be any `String` (or any Ruby object that responds to `#to_s`).
|
81
81
|
|
82
|
-
nokogiri-html-ext takes advantage of [the `Nokogiri::XML::Document.parse` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/xml/document.rb#L48)'s second positional argument to set the parsed document's URL.Nokogiri's source code is _very_ complex, but in short: [the `Nokogiri::HTML` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/html.rb#L7-L8) is an alias to [the `Nokogiri::HTML4` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/html4.rb#
|
82
|
+
nokogiri-html-ext takes advantage of [the `Nokogiri::XML::Document.parse` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/xml/document.rb#L48)'s second positional argument to set the parsed document's URL. Nokogiri's source code is _very_ complex, but in short: [the `Nokogiri::HTML` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/html.rb#L7-L8) is an alias to [the `Nokogiri::HTML4` method](https://github.com/sparklemotion/nokogiri/blob/main/lib/nokogiri/html4.rb#L6-L9) which eventually winds its way to the aforementioned `Nokogiri::XML::Document.parse` method. _Phew._ 🥵
|
83
83
|
|
84
|
-
URL resolution uses
|
84
|
+
URL resolution uses [the Addressable gem](https://rubygems.org/gems/addressable)'s URL parsing and normalizing capabilities. Absolute URLs will remain unmodified.
|
85
85
|
|
86
|
-
|
86
|
+
> [!NOTE]
|
87
|
+
If the document's markup includes a `<base>` element whose `href` attribute is an absolute URL, _that_ URL will take precedence when performing URL resolution.
|
87
88
|
|
88
89
|
An abbreviated example:
|
89
90
|
|
@@ -133,6 +134,9 @@ doc.resolve_relative_url("biz/baz")
|
|
133
134
|
#=> "https://jgarber.example/foo/biz/baz"
|
134
135
|
```
|
135
136
|
|
137
|
+
> [!NOTE]
|
138
|
+
> Nokogiri's default `Nokogiri::HTML` method returns a `Nokogiri::HTML4::Document` which will encode URLs with non-ASCII characters. For example, `☠️.example` will be encoded as `%25E2%2598%25A0%25EF%25B8%258F.example`. For a more consistence experience, use the `Nokogiri::HTML5` method which does not encode URLs in this manner.
|
139
|
+
|
136
140
|
## Acknowledgments
|
137
141
|
|
138
142
|
nokogiri-html-ext wouldn't exist without the [Nokogiri](https://nokogiri.org) project and its [community](https://github.com/sparklemotion/nokogiri).
|
data/lib/nokogiri/html-ext.rb
CHANGED
@@ -28,11 +28,9 @@ module Nokogiri
|
|
28
28
|
|
29
29
|
ATTRIBUTES_XPATHS =
|
30
30
|
URL_ATTRIBUTES_MAP.merge(SRCSET_ATTRIBUTES_MAP).flat_map do |attribute, names|
|
31
|
-
names.map { |name| "
|
31
|
+
names.map { |name| ".//#{name} / @#{attribute}" }
|
32
32
|
end
|
33
33
|
|
34
|
-
URI_PARSER = defined?(URI::RFC2396_PARSER) ? URI::RFC2396_PARSER : URI::Generic::DEFAULT_PARSER
|
35
|
-
|
36
34
|
# Get the +<base>+ element's HREF attribute value.
|
37
35
|
#
|
38
36
|
# @return [String, nil]
|
@@ -64,15 +62,10 @@ module Nokogiri
|
|
64
62
|
#
|
65
63
|
# @return [String]
|
66
64
|
def resolve_relative_url(url)
|
67
|
-
strs = [
|
68
|
-
|
69
|
-
strs.compact!
|
70
|
-
strs.map! { |str| URI_PARSER.escape(str) }
|
65
|
+
strs = [document.url, base_href, url].compact
|
71
66
|
|
72
|
-
|
73
|
-
|
74
|
-
URI_PARSER.unescape(URI_PARSER.join(*strs).normalize.to_s)
|
75
|
-
rescue URI::InvalidComponentError, URI::InvalidURIError
|
67
|
+
Addressable::URI.join(*strs).to_s
|
68
|
+
rescue Addressable::URI::InvalidURIError
|
76
69
|
url
|
77
70
|
end
|
78
71
|
|
@@ -96,15 +89,12 @@ module Nokogiri
|
|
96
89
|
|
97
90
|
private
|
98
91
|
|
99
|
-
# +
|
100
|
-
#
|
101
|
-
#
|
92
|
+
# Resolve a set of +String+s that represent +srcset+ attribute image
|
93
|
+
# candidate strings.
|
94
|
+
#
|
95
|
+
# @param srcset_attributes [Array<String>]
|
102
96
|
#
|
103
97
|
# @return [String]
|
104
|
-
def doc_url_str
|
105
|
-
@doc_url_str ||= URI_PARSER.unescape(URI_PARSER.unescape(document.url)).strip
|
106
|
-
end
|
107
|
-
|
108
98
|
def resolve_srcset_attributes(srcset_attributes)
|
109
99
|
srcset_attributes.map! do |candidate_string|
|
110
100
|
# rubocop:disable Style/PerlBackrefs
|
data/nokogiri-html-ext.gemspec
CHANGED
@@ -4,7 +4,7 @@ Gem::Specification.new do |spec|
|
|
4
4
|
spec.required_ruby_version = ">= 2.7"
|
5
5
|
|
6
6
|
spec.name = "nokogiri-html-ext"
|
7
|
-
spec.version = "1.
|
7
|
+
spec.version = "1.4.0"
|
8
8
|
spec.authors = ["Jason Garber"]
|
9
9
|
spec.email = ["jason@sixtwothree.org"]
|
10
10
|
|
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
"source_code_uri" => "#{spec.homepage}/src/tag/v#{spec.version}",
|
29
29
|
}
|
30
30
|
|
31
|
+
spec.add_dependency "addressable", "~> 2.8.7"
|
31
32
|
spec.add_dependency "nokogiri", "~> 1.14"
|
32
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri-html-ext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jason Garber
|
@@ -9,6 +9,20 @@ bindir: bin
|
|
9
9
|
cert_chain: []
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: addressable
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 2.8.7
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 2.8.7
|
12
26
|
- !ruby/object:Gem::Dependency
|
13
27
|
name: nokogiri
|
14
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -41,11 +55,11 @@ licenses:
|
|
41
55
|
- MIT
|
42
56
|
metadata:
|
43
57
|
bug_tracker_uri: https://codeberg.org/jgarber/nokogiri-html-ext/issues
|
44
|
-
changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.
|
45
|
-
documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.
|
58
|
+
changelog_uri: https://codeberg.org/jgarber/nokogiri-html-ext/releases/tag/v1.4.0
|
59
|
+
documentation_uri: https://rubydoc.info/gems/nokogiri-html-ext/1.4.0
|
46
60
|
homepage_uri: https://codeberg.org/jgarber/nokogiri-html-ext
|
47
61
|
rubygems_mfa_required: 'true'
|
48
|
-
source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.
|
62
|
+
source_code_uri: https://codeberg.org/jgarber/nokogiri-html-ext/src/tag/v1.4.0
|
49
63
|
rdoc_options: []
|
50
64
|
require_paths:
|
51
65
|
- lib
|