loofah 2.13.0 → 2.14.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +4 -5
- data/lib/loofah/elements.rb +5 -2
- data/lib/loofah/instance_methods.rb +4 -4
- data/lib/loofah/scrubbers.rb +7 -2
- data/lib/loofah/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95836cdfac672774704d62557836c319edf8c1ffd17323b61c749aabbb71f6b6
|
4
|
+
data.tar.gz: ddc0bc8f3dc588cc4a69651debacb404cedc02e047f585bde0758ec4186d6018
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05b043ea1e8de851488dc084cae3f596816b54549cfc4f9a1d5fd675093d8dc06956e6880286adcdcffe5aca6d29e46a97052cc526cc7b3667277ff8a9575ba7
|
7
|
+
data.tar.gz: fd0fbee37a8150709ba178889d96532a4b2efdd402ccc2d922c2022a88f04188ebc221037d522cf7af67eaf3f1c4d74acba1de37ca2f70e1d0214afff69f6f85
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.14.0 / 2022-02-11
|
4
|
+
|
5
|
+
### Features
|
6
|
+
|
7
|
+
* The `#to_text` method on `Loofah::HTML::{Document,DocumentFragment}` replaces `<br>` line break elements with a newline. [[#225](https://github.com/flavorjones/loofah/issues/225)]
|
8
|
+
|
9
|
+
|
3
10
|
## 2.13.0 / 2021-12-10
|
4
11
|
|
5
12
|
### Bug fixes
|
data/README.md
CHANGED
@@ -133,13 +133,12 @@ and `text` to return plain text:
|
|
133
133
|
doc.text # => "ohai! div is safe "
|
134
134
|
```
|
135
135
|
|
136
|
-
Also, `to_text` is available, which does the right thing with
|
137
|
-
whitespace around block-level elements.
|
136
|
+
Also, `to_text` is available, which does the right thing with whitespace around block-level and line break elements.
|
138
137
|
|
139
138
|
``` ruby
|
140
|
-
doc = Loofah.fragment("<h1>Title</h1><div>Content</div>")
|
141
|
-
doc.text # => "
|
142
|
-
doc.to_text # => "\nTitle\n\nContent\n"
|
139
|
+
doc = Loofah.fragment("<h1>Title</h1><div>Content<br>Next line</div>")
|
140
|
+
doc.text # => "TitleContentNext line" # probably not what you want
|
141
|
+
doc.to_text # => "\nTitle\n\nContent\nNext line\n" # better
|
143
142
|
```
|
144
143
|
|
145
144
|
### Loofah::XML::Document and Loofah::XML::DocumentFragment
|
data/lib/loofah/elements.rb
CHANGED
@@ -70,8 +70,6 @@ module Loofah
|
|
70
70
|
video
|
71
71
|
]
|
72
72
|
|
73
|
-
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
74
|
-
|
75
73
|
# The following elements may also be considered block-level
|
76
74
|
# elements since they may contain block-level elements
|
77
75
|
LOOSE_BLOCK_LEVEL = Set.new %w[dd
|
@@ -86,7 +84,12 @@ module Loofah
|
|
86
84
|
tr
|
87
85
|
]
|
88
86
|
|
87
|
+
# Elements that aren't block but should generate a newline in #to_text
|
88
|
+
INLINE_LINE_BREAK = Set.new(["br"])
|
89
|
+
|
90
|
+
STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
|
89
91
|
BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
|
92
|
+
LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
|
90
93
|
end
|
91
94
|
|
92
95
|
::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements
|
@@ -112,11 +112,11 @@ module Loofah
|
|
112
112
|
# Returns a plain-text version of the markup contained by the
|
113
113
|
# fragment, with HTML entities encoded.
|
114
114
|
#
|
115
|
-
# This method is slower than #
|
116
|
-
# whitespace around block elements.
|
115
|
+
# This method is slower than #text, but is clever about
|
116
|
+
# whitespace around block elements and line break elements.
|
117
117
|
#
|
118
|
-
# Loofah.document("<h1>Title</h1><div>Content</div>").to_text
|
119
|
-
# # => "\nTitle\n\nContent\n"
|
118
|
+
# Loofah.document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
|
119
|
+
# # => "\nTitle\n\nContent\nNext line\n"
|
120
120
|
#
|
121
121
|
def to_text(options = {})
|
122
122
|
Loofah.remove_extraneous_whitespace self.dup.scrub!(:newline_block_elements).text(options)
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -240,8 +240,13 @@ module Loofah
|
|
240
240
|
end
|
241
241
|
|
242
242
|
def scrub(node)
|
243
|
-
return CONTINUE unless Loofah::Elements::
|
244
|
-
|
243
|
+
return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
|
244
|
+
replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
|
245
|
+
"\n"
|
246
|
+
else
|
247
|
+
"\n#{node.content}\n"
|
248
|
+
end
|
249
|
+
node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
|
245
250
|
node.remove
|
246
251
|
end
|
247
252
|
end
|
data/lib/loofah/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: loofah
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-02-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: crass
|
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
199
|
- !ruby/object:Gem::Version
|
200
200
|
version: '0'
|
201
201
|
requirements: []
|
202
|
-
rubygems_version: 3.
|
202
|
+
rubygems_version: 3.3.5
|
203
203
|
signing_key:
|
204
204
|
specification_version: 4
|
205
205
|
summary: Loofah is a general library for manipulating and transforming HTML/XML documents
|