loofah 2.23.1 → 2.24.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9817bf69ee1ad2d7e93edad53209db1347c9ec360076520532cdf60ad158a8f6
4
- data.tar.gz: 3381ac563f4b70c4ccca49301ba369f22716da405dd7641af535209e6817258b
3
+ metadata.gz: e003942bbd3be7a5d576d4b48002964664c844b3aa66aa24a6b55e536ecd0662
4
+ data.tar.gz: f959fadbc762a26167cc46563d92e8be0d085372467b774dcbb3f18dabc6d8c8
5
5
  SHA512:
6
- metadata.gz: 482eeac3b61aba7e1b517aaab8a6d010ae44bd5b99b749cdabb5381936b4b9e8f97c62d99a60bdf1fbf64c2dba8c33062b676083f4e82d7e07f33e7666fe148f
7
- data.tar.gz: 69234fb9c4d7d55eb63a18008d035df1117bb6ea10f0f15b57bacc5152103fc0830a53a8d98a63e817360332975e257dbeda223ed43e42813724ce30a3b8d7e6
6
+ metadata.gz: 254caef30657885b063ae85c0ac01ba386aa387571a7f5587dfee57f4e803047f56579d11425bb01a6aa50209b3b1755d7e2c613412f4783f3ce5e6d4fd70b70
7
+ data.tar.gz: 4a531c380c0ce45a9c3e9ea1a87c08da487331bf56db20e6bf42b903ca1e1f3e043f72ddfc1ad82076fffcad6bd08055131952b62c40f0b6067fcac3a552cfd7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.24.0 / 2024-12-24
4
+
5
+ ### Added
6
+
7
+ * Built-in scrubber `:double_breakpoint` which sees `<br><br>` and wraps the surrounding content in `<p>` tags. #279, #284 @josecolella @torihuang
8
+
9
+ ### Improved
10
+
11
+ * Built-in scrubber `:targetblank` now skips `a` tags whose `href` attribute is an anchor link. Previously, all `a` tags were modified to have `target='_blank'`. #291 @fnando
12
+
13
+
3
14
  ## 2.23.1 / 2024-10-25
4
15
 
5
16
  ### Added
data/README.md CHANGED
@@ -31,6 +31,8 @@ Active Record extensions for HTML sanitization are available in the [`loofah-act
31
31
  * Add the _nofollow_ attribute to all hyperlinks.
32
32
  * Add the _target=\_blank_ attribute to all hyperlinks.
33
33
  * Remove _unprintable_ characters from text nodes.
34
+ * Some specialized HTML transformations are also built-in:
35
+ * Where `<br><br>` exists inside a `p` tag, close the `p` and open a new one.
34
36
  * Format markup as plain text, with (or without) sensible whitespace handling around block elements.
35
37
  * Replace Rails's `strip_tags` and `sanitize` view helper methods.
36
38
 
@@ -227,14 +229,15 @@ doc.scrub!(:whitewash) # removes unknown/unsafe/namespaced tags and their chi
227
229
  # and strips all node attributes
228
230
  ```
229
231
 
230
- Loofah also comes with some common transformation tasks:
232
+ Loofah also comes with built-in scrubers for some common transformation tasks:
231
233
 
232
234
  ``` ruby
233
- doc.scrub!(:nofollow) # adds rel="nofollow" attribute to links
234
- doc.scrub!(:noopener) # adds rel="noopener" attribute to links
235
- doc.scrub!(:noreferrer) # adds rel="noreferrer" attribute to links
236
- doc.scrub!(:unprintable) # removes unprintable characters from text nodes
237
- doc.scrub!(:targetblank) # adds target="_blank" attribute to links
235
+ doc.scrub!(:nofollow) # adds rel="nofollow" attribute to links
236
+ doc.scrub!(:noopener) # adds rel="noopener" attribute to links
237
+ doc.scrub!(:noreferrer) # adds rel="noreferrer" attribute to links
238
+ doc.scrub!(:unprintable) # removes unprintable characters from text nodes
239
+ doc.scrub!(:targetblank) # adds target="_blank" attribute to links
240
+ doc.scrub!(:double_breakpoint) # where `<br><br>` appears in a `p` tag, close the `p` and open a new one
238
241
  ```
239
242
 
240
243
  See `Loofah::Scrubbers` for more details and example usage.
@@ -251,7 +251,9 @@ module Loofah
251
251
  def scrub(node)
252
252
  return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
253
253
 
254
- node.set_attribute("target", "_blank")
254
+ href = node["href"]
255
+
256
+ node.set_attribute("target", "_blank") if href && href[0] != "#"
255
257
 
256
258
  STOP
257
259
  end
@@ -348,6 +350,57 @@ module Loofah
348
350
  end
349
351
  end
350
352
 
353
+ #
354
+ # === scrub!(:double_breakpoint)
355
+ #
356
+ # +:double_breakpoint+ replaces double-break tags with closing/opening paragraph tags.
357
+ #
358
+ # markup = "<p>Some text here in a logical paragraph.<br><br>Some more text, apparently a second paragraph.</p>"
359
+ # Loofah.html5_fragment(markup).scrub!(:double_breakpoint)
360
+ # => "<p>Some text here in a logical paragraph.</p><p>Some more text, apparently a second paragraph.</p>"
361
+ #
362
+ class DoubleBreakpoint < Scrubber
363
+ def initialize # rubocop:disable Lint/MissingSuper
364
+ @direction = :top_down
365
+ end
366
+
367
+ def scrub(node)
368
+ return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "p")
369
+
370
+ paragraph_with_break_point_nodes = node.xpath("//p[br[following-sibling::br]]")
371
+
372
+ paragraph_with_break_point_nodes.each do |paragraph_node|
373
+ new_paragraph = paragraph_node.add_previous_sibling("<p>").first
374
+
375
+ paragraph_node.children.each do |child|
376
+ remove_blank_text_nodes(child)
377
+ end
378
+
379
+ paragraph_node.children.each do |child|
380
+ # already unlinked
381
+ next if child.parent.nil?
382
+
383
+ if child.name == "br" && child.next_sibling.name == "br"
384
+ new_paragraph = paragraph_node.add_previous_sibling("<p>").first
385
+ child.next_sibling.unlink
386
+ child.unlink
387
+ else
388
+ child.parent = new_paragraph
389
+ end
390
+ end
391
+
392
+ paragraph_node.unlink
393
+ end
394
+
395
+ CONTINUE
396
+ end
397
+
398
+ private
399
+
400
+ def remove_blank_text_nodes(node)
401
+ node.unlink if node.text? && node.blank?
402
+ end
403
+ end
351
404
  #
352
405
  # A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
353
406
  #
@@ -362,6 +415,7 @@ module Loofah
362
415
  targetblank: TargetBlank,
363
416
  newline_block_elements: NewlineBlockElements,
364
417
  unprintable: Unprintable,
418
+ double_breakpoint: DoubleBreakpoint,
365
419
  }
366
420
 
367
421
  class << self
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Loofah
4
4
  # The version of Loofah you are using
5
- VERSION = "2.23.1"
5
+ VERSION = "2.24.0"
6
6
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: loofah
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.23.1
4
+ version: 2.24.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
8
8
  - Bryan Helmkamp
9
- autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2024-10-25 00:00:00.000000000 Z
11
+ date: 2025-01-01 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: crass
@@ -82,7 +81,7 @@ metadata:
82
81
  bug_tracker_uri: https://github.com/flavorjones/loofah/issues
83
82
  changelog_uri: https://github.com/flavorjones/loofah/blob/main/CHANGELOG.md
84
83
  documentation_uri: https://www.rubydoc.info/gems/loofah/
85
- post_install_message:
84
+ funding_uri: https://github.com/sponsors/flavorjones
86
85
  rdoc_options: []
87
86
  require_paths:
88
87
  - lib
@@ -97,8 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
96
  - !ruby/object:Gem::Version
98
97
  version: '0'
99
98
  requirements: []
100
- rubygems_version: 3.5.22
101
- signing_key:
99
+ rubygems_version: 3.6.2
102
100
  specification_version: 4
103
101
  summary: Loofah is a general library for manipulating and transforming HTML/XML documents
104
102
  and fragments, built on top of Nokogiri.