loofah 2.3.1 → 2.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +193 -40
  3. data/README.md +13 -12
  4. data/lib/loofah/elements.rb +79 -75
  5. data/lib/loofah/helpers.rb +5 -4
  6. data/lib/loofah/html/document.rb +1 -0
  7. data/lib/loofah/html/document_fragment.rb +4 -2
  8. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  9. data/lib/loofah/html5/safelist.rb +273 -27
  10. data/lib/loofah/html5/scrub.rb +147 -52
  11. data/lib/loofah/instance_methods.rb +14 -8
  12. data/lib/loofah/metahelpers.rb +2 -1
  13. data/lib/loofah/scrubber.rb +12 -7
  14. data/lib/loofah/scrubbers.rb +20 -18
  15. data/lib/loofah/version.rb +5 -0
  16. data/lib/loofah/xml/document.rb +1 -0
  17. data/lib/loofah/xml/document_fragment.rb +2 -1
  18. data/lib/loofah.rb +33 -16
  19. metadata +45 -125
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -41
  23. data/Rakefile +0 -81
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/test/assets/msword.html +0 -63
  29. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  30. data/test/helper.rb +0 -18
  31. data/test/html5/test_sanitizer.rb +0 -401
  32. data/test/html5/test_scrub.rb +0 -10
  33. data/test/integration/test_ad_hoc.rb +0 -220
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1196afab25d29644d1961e4516ac317a2c38dee3295f35354c468e6a9318fa55
4
- data.tar.gz: 2e07ff641edb37d2b0dce2933288da4667d4b680a586912af9c171db7dfb0a63
3
+ metadata.gz: bd3edb0acdf2359d82564aca0bc13710d9f6c49157963d18953ff55bd7c14413
4
+ data.tar.gz: 3a6e11b7deb9cfb469aaf6ec919062687bd4215ef11980bded72ca298807610c
5
5
  SHA512:
6
- metadata.gz: 37ac2cdb0d136da417cff62e3845c5b71769f044d8150c636a549dc9ca4cf98bcef4c6d2b6e653eff56922b95d812ed39310a406c49366c14791456ca905e8fe
7
- data.tar.gz: 0fa3cdd75a3d2950801a1cfe7f8d4cad6bb73bbec67d24ba25980c09a565f6c95c5d664c1789ccd62486d1917c685a5b0f762cc073a054bbb0f02fb0222688f0
6
+ metadata.gz: 4970a6aa72265f60556dd6fd254375c86d3f83be23f3bbcc8b04df00ce0e801e8ef9e67d0a77ca6a21915be89226131c16a7f3540f02538cc2b9a369950dfebf
7
+ data.tar.gz: 27e3a06cc391ec3d9e3c966efdb6b4ce58e98c397ec87490d418406c17757e5cb0193edabaced30a9f24320c729e6730308e346610859f9f7c6d5fcc6f72cd56
data/CHANGELOG.md CHANGED
@@ -1,29 +1,182 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.19.1 / 2022-12-13
4
+
5
+ ### Security
6
+
7
+ * Address CVE-2022-23514, inefficient regular expression complexity. See [GHSA-486f-hjj9-9vhh](https://github.com/flavorjones/loofah/security/advisories/GHSA-486f-hjj9-9vhh) for more information.
8
+ * Address CVE-2022-23515, improper neutralization of data URIs. See [GHSA-228g-948r-83gx](https://github.com/flavorjones/loofah/security/advisories/GHSA-228g-948r-83gx) for more information.
9
+ * Address CVE-2022-23516, uncontrolled recursion. See [GHSA-3x8r-x6xp-q4vm](https://github.com/flavorjones/loofah/security/advisories/GHSA-3x8r-x6xp-q4vm) for more information.
10
+
11
+
12
+ ## 2.19.0 / 2022-09-14
13
+
14
+ ### Features
15
+
16
+ * Allow SVG 1.0 color keyword names in CSS attributes. These colors are part of the [CSS Color Module Level 3](https://www.w3.org/TR/css-color-3/#svg-color) recommendation released 2022-01-18. [[#243](https://github.com/flavorjones/loofah/issues/243)]
17
+
18
+
19
+ ## 2.18.0 / 2022-05-11
20
+
21
+ ### Features
22
+
23
+ * Allow CSS property `aspect-ratio`. [[#236](https://github.com/flavorjones/loofah/issues/236)] (Thanks, [@louim](https://github.com/louim)!)
24
+
25
+
26
+ ## 2.17.0 / 2022-04-28
27
+
28
+ ### Features
29
+
30
+ * Allow ARIA attributes. [[#232](https://github.com/flavorjones/loofah/issues/232), [#233](https://github.com/flavorjones/loofah/issues/233)] (Thanks, [@nick-desteffen](https://github.com/nick-desteffen)!)
31
+
32
+
33
+ ## 2.16.0 / 2022-04-01
34
+
35
+ ### Features
36
+
37
+ * Allow MathML elements `menclose` and `ms`, and MathML attributes `dir`, `href`, `lquote`, `mathsize`, `notation`, and `rquote`. [[#231](https://github.com/flavorjones/loofah/issues/231)] (Thanks, [@nick-desteffen](https://github.com/nick-desteffen)!)
38
+
39
+
40
+ ## 2.15.0 / 2022-03-14
41
+
42
+ ### Features
43
+
44
+ * Expand set of allowed protocols to include `sms:`. [[#228](https://github.com/flavorjones/loofah/issues/228)] (Thanks, [@brendon](https://github.com/brendon)!)
45
+
46
+
47
+ ## 2.14.0 / 2022-02-11
48
+
49
+ ### Features
50
+
51
+ * The `#to_text` method on `Loofah::HTML::{Document,DocumentFragment}` replaces `<br>` line break elements with a newline. [[#225](https://github.com/flavorjones/loofah/issues/225)]
52
+
53
+
54
+ ## 2.13.0 / 2021-12-10
55
+
56
+ ### Bug fixes
57
+
58
+ * Loofah::HTML::DocumentFragment#text no longer serializes top-level comment children. [[#221](https://github.com/flavorjones/loofah/issues/221)]
59
+
60
+
61
+ ## 2.12.0 / 2021-08-11
62
+
63
+ ### Features
64
+
65
+ * Support empty HTML5 data attributes. [[#215](https://github.com/flavorjones/loofah/issues/215)]
66
+
67
+
68
+ ## 2.11.0 / 2021-07-31
69
+
70
+ ### Features
71
+
72
+ * Allow HTML5 element `wbr`.
73
+ * Allow all CSS property values for `border-collapse`. [[#201](https://github.com/flavorjones/loofah/issues/201)]
74
+
75
+
76
+ ### Changes
77
+
78
+ * Deprecating `Loofah::HTML5::SafeList::VOID_ELEMENTS` which is not a canonical list of void HTML4 or HTML5 elements.
79
+ * Removed some elements from `Loofah::HTML5::SafeList::VOID_ELEMENTS` that either are not acceptable elements or aren't considered "void" by libxml2.
80
+
81
+
82
+ ## 2.10.0 / 2021-06-06
83
+
84
+ ### Features
85
+
86
+ * Allow CSS properties `overflow-x` and `overflow-y`. [[#206](https://github.com/flavorjones/loofah/issues/206)] (Thanks, [@sampokuokkanen](https://github.com/sampokuokkanen)!)
87
+
88
+
89
+ ## 2.9.1 / 2021-04-07
90
+
91
+ ### Bug fixes
92
+
93
+ * Fix a regression in v2.9.0 which inappropriately removed CSS properties with quoted string values. [[#202](https://github.com/flavorjones/loofah/issues/202)]
94
+
95
+
96
+ ## 2.9.0 / 2021-01-14
97
+
98
+ ### Features
99
+
100
+ * Handle CSS functions in a CSS shorthand property (like `background`). [[#199](https://github.com/flavorjones/loofah/issues/199), [#200](https://github.com/flavorjones/loofah/issues/200)]
101
+
102
+
103
+ ## 2.8.0 / 2020-11-25
104
+
105
+ ### Features
106
+
107
+ * Allow CSS properties `order`, `flex-direction`, `flex-grow`, `flex-wrap`, `flex-shrink`, `flex-flow`, `flex-basis`, `flex`, `justify-content`, `align-self`, `align-items`, and `align-content`. [[#197](https://github.com/flavorjones/loofah/issues/197)] (Thanks, [@miguelperez](https://github.com/miguelperez)!)
108
+
109
+
110
+ ## 2.7.0 / 2020-08-26
111
+
112
+ ### Features
113
+
114
+ * Allow CSS properties `page-break-before`, `page-break-inside`, and `page-break-after`. [[#190](https://github.com/flavorjones/loofah/issues/190)] (Thanks, [@ahorek](https://github.com/ahorek)!)
115
+
116
+
117
+ ### Fixes
118
+
119
+ * Don't drop the `!important` rule from some CSS properties. [[#191](https://github.com/flavorjones/loofah/issues/191)] (Thanks, [@b7kich](https://github.com/b7kich)!)
120
+
121
+
122
+ ## 2.6.0 / 2020-06-16
123
+
124
+ ### Features
125
+
126
+ * Allow CSS `border-style` keywords. [[#188](https://github.com/flavorjones/loofah/issues/188)] (Thanks, [@tarcisiozf](https://github.com/tarcisiozf)!)
127
+
128
+
129
+ ## 2.5.0 / 2020-04-05
130
+
131
+ ### Features
132
+
133
+ * Allow more CSS length units: "ch", "vw", "vh", "Q", "lh", "vmin", "vmax". [[#178](https://github.com/flavorjones/loofah/issues/178)] (Thanks, [@JuanitoFatas](https://github.com/JuanitoFatas)!)
134
+
135
+
136
+ ### Fixes
137
+
138
+ * Remove comments from `Loofah::HTML::Document`s that exist outside the `html` element. [[#80](https://github.com/flavorjones/loofah/issues/80)]
139
+
140
+
141
+ ### Other changes
142
+
143
+ * Gem metadata being set [[#181](https://github.com/flavorjones/loofah/issues/181)] (Thanks, [@JuanitoFatas](https://github.com/JuanitoFatas)!)
144
+ * Test files removed from gem file [[#180](https://github.com/flavorjones/loofah/issues/180),[#166](https://github.com/flavorjones/loofah/issues/166),[#159](https://github.com/flavorjones/loofah/issues/159)] (Thanks, [@JuanitoFatas](https://github.com/JuanitoFatas) and [@greysteil](https://github.com/greysteil)!)
145
+
146
+
147
+ ## 2.4.0 / 2019-11-25
148
+
149
+ ### Features
150
+
151
+ * Allow CSS property `max-width` [[#175](https://github.com/flavorjones/loofah/issues/175)] (Thanks, [@bchaney](https://github.com/bchaney)!)
152
+ * Allow CSS sizes expressed in `rem` [[#176](https://github.com/flavorjones/loofah/issues/176), [#177](https://github.com/flavorjones/loofah/issues/177)]
153
+ * Add `frozen_string_literal: true` magic comment to all `lib` files. [[#118](https://github.com/flavorjones/loofah/issues/118)]
154
+
155
+
3
156
  ## 2.3.1 / 2019-10-22
4
157
 
5
158
  ### Security
6
159
 
7
160
  Address CVE-2019-15587: Unsanitized JavaScript may occur in sanitized output when a crafted SVG element is republished.
8
161
 
9
- This CVE's public notice is at https://github.com/flavorjones/loofah/issues/171
162
+ This CVE's public notice is at [#171](https://github.com/flavorjones/loofah/issues/171)
10
163
 
11
164
 
12
165
  ## 2.3.0 / 2019-09-28
13
166
 
14
167
  ### Features
15
168
 
16
- * Expand set of allowed protocols to include `tel:` and `line:`. [#104, #147]
17
- * Expand set of allowed CSS functions. [related to #122]
18
- * Allow greater precision in shorthand CSS values. [#149] (Thanks, @danfstucky!)
19
- * Allow CSS property `list-style` [#162] (Thanks, @jaredbeck!)
20
- * Allow CSS keywords `thick` and `thin` [#168] (Thanks, @georgeclaghorn!)
21
- * Allow HTML property `contenteditable` [#167] (Thanks, @andreynering!)
169
+ * Expand set of allowed protocols to include `tel:` and `line:`. [[#104](https://github.com/flavorjones/loofah/issues/104), [#147](https://github.com/flavorjones/loofah/issues/147)]
170
+ * Expand set of allowed CSS functions. [related to [#122](https://github.com/flavorjones/loofah/issues/122)]
171
+ * Allow greater precision in shorthand CSS values. [[#149](https://github.com/flavorjones/loofah/issues/149)] (Thanks, [@danfstucky](https://github.com/danfstucky)!)
172
+ * Allow CSS property `list-style` [[#162](https://github.com/flavorjones/loofah/issues/162)] (Thanks, [@jaredbeck](https://github.com/jaredbeck)!)
173
+ * Allow CSS keywords `thick` and `thin` [[#168](https://github.com/flavorjones/loofah/issues/168)] (Thanks, [@georgeclaghorn](https://github.com/georgeclaghorn)!)
174
+ * Allow HTML property `contenteditable` [[#167](https://github.com/flavorjones/loofah/issues/167)] (Thanks, [@andreynering](https://github.com/andreynering)!)
22
175
 
23
176
 
24
177
  ### Bug fixes
25
178
 
26
- * CSS hex values are no longer limited to lowercase hex. Previously uppercase hex were scrubbed. [#165] (Thanks, @asok!)
179
+ * CSS hex values are no longer limited to lowercase hex. Previously uppercase hex were scrubbed. [[#165](https://github.com/flavorjones/loofah/issues/165)] (Thanks, [@asok](https://github.com/asok)!)
27
180
 
28
181
 
29
182
  ### Deprecations / Name Changes
@@ -34,7 +187,7 @@ The following method and constants are hereby deprecated, and will be completely
34
187
  * Deprecate `Loofah::Helpers::ActionView::WhiteListSanitizer`, please use `Loofah::Helpers::ActionView::SafeListSanitizer` instead.
35
188
  * Deprecate `Loofah::HTML5::WhiteList`, please use `Loofah::HTML5::SafeList` instead.
36
189
 
37
- Thanks to @JuanitoFatas for submitting these changes in #164 and for making the language used in Loofah more inclusive.
190
+ Thanks to [@JuanitoFatas](https://github.com/JuanitoFatas) for submitting these changes in [#164](https://github.com/flavorjones/loofah/issues/164) and for making the language used in Loofah more inclusive.
38
191
 
39
192
 
40
193
  ## 2.2.3 / 2018-10-30
@@ -43,7 +196,7 @@ Thanks to @JuanitoFatas for submitting these changes in #164 and for making the
43
196
 
44
197
  Address CVE-2018-16468: Unsanitized JavaScript may occur in sanitized output when a crafted SVG element is republished.
45
198
 
46
- This CVE's public notice is at https://github.com/flavorjones/loofah/issues/154
199
+ This CVE's public notice is at [#154](https://github.com/flavorjones/loofah/issues/154)
47
200
 
48
201
 
49
202
  ## Meta / 2018-10-27
@@ -70,76 +223,76 @@ attribute scrubbers should they need to address CVE-2018-8048.
70
223
 
71
224
  Addresses CVE-2018-8048. Loofah allowed non-whitelisted attributes to be present in sanitized output when input with specially-crafted HTML fragments.
72
225
 
73
- This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
226
+ This CVE's public notice is at [#144](https://github.com/flavorjones/loofah/issues/144)
74
227
 
75
228
 
76
229
  ## 2.2.0 / 2018-02-11
77
230
 
78
231
  ### Features:
79
232
 
80
- * Support HTML5 `<main>` tag. #133 (Thanks, @MothOnMars!)
81
- * Recognize HTML5 block elements. #136 (Thanks, @MothOnMars!)
82
- * Support SVG `<symbol>` tag. #131 (Thanks, @baopham!)
83
- * Support for whitelisting CSS functions, initially just `calc` and `rgb`. #122/#123/#129 (Thanks, @NikoRoberts!)
84
- * Whitelist CSS property `list-style-type`. #68/#137/#142 (Thanks, @andela-ysanni and @NikoRoberts!)
233
+ * Support HTML5 `<main>` tag. [#133](https://github.com/flavorjones/loofah/issues/133) (Thanks, [@MothOnMars](https://github.com/MothOnMars)!)
234
+ * Recognize HTML5 block elements. [#136](https://github.com/flavorjones/loofah/issues/136) (Thanks, [@MothOnMars](https://github.com/MothOnMars)!)
235
+ * Support SVG `<symbol>` tag. [#131](https://github.com/flavorjones/loofah/issues/131) (Thanks, [@baopham](https://github.com/baopham)!)
236
+ * Support for whitelisting CSS functions, initially just `calc` and `rgb`. [#122](https://github.com/flavorjones/loofah/issues/122)/[#123](https://github.com/flavorjones/loofah/issues/123)/[#129](https://github.com/flavorjones/loofah/issues/129) (Thanks, [@NikoRoberts](https://github.com/NikoRoberts)!)
237
+ * Whitelist CSS property `list-style-type`. [#68](https://github.com/flavorjones/loofah/issues/68)/[#137](https://github.com/flavorjones/loofah/issues/137)/[#142](https://github.com/flavorjones/loofah/issues/142) (Thanks, [@andela-ysanni](https://github.com/andela-ysanni) and [@NikoRoberts](https://github.com/NikoRoberts)!)
85
238
 
86
239
  ### Bugfixes:
87
240
 
88
- * Properly handle nested `script` tags. #127.
241
+ * Properly handle nested `script` tags. [#127](https://github.com/flavorjones/loofah/issues/127).
89
242
 
90
243
 
91
244
  ## 2.1.1 / 2017-09-24
92
245
 
93
246
  ### Bugfixes:
94
247
 
95
- * Removed warning for unused variable. #124 (Thanks, @y-yagi!)
248
+ * Removed warning for unused variable. [#124](https://github.com/flavorjones/loofah/issues/124) (Thanks, [@y-yagi](https://github.com/y-yagi)!)
96
249
 
97
250
 
98
251
  ## 2.1.0 / 2017-09-24
99
252
 
100
253
  ### Notes:
101
254
 
102
- * Re-implemented CSS parsing and sanitization using the [crass](https://github.com/rgrove/crass) library. #91
255
+ * Re-implemented CSS parsing and sanitization using the [crass](https://github.com/rgrove/crass) library. [#91](https://github.com/flavorjones/loofah/issues/91)
103
256
 
104
257
 
105
258
  ### Features:
106
259
 
107
- * Added :noopener HTML scrubber (Thanks, @tastycode!)
108
- * Support `data` URIs with the following media types: text/plain, text/css, image/png, image/gif, image/jpeg, image/svg+xml. #101, #120. (Thanks, @mrpasquini!)
260
+ * Added :noopener HTML scrubber (Thanks, [@tastycode](https://github.com/tastycode)!)
261
+ * Support `data` URIs with the following media types: text/plain, text/css, image/png, image/gif, image/jpeg, image/svg+xml. [#101](https://github.com/flavorjones/loofah/issues/101), [#120](https://github.com/flavorjones/loofah/issues/120). (Thanks, [@mrpasquini](https://github.com/mrpasquini)!)
109
262
 
110
263
 
111
264
  ### Bugfixes:
112
265
 
113
- * The :unprintable scrubber now scrubs unprintable characters in CDATA nodes (like `<script>`). #124
114
- * Allow negative values in CSS properties. Restores functionality that was reverted in v2.0.3. #91
266
+ * The :unprintable scrubber now scrubs unprintable characters in CDATA nodes (like `<script>`). [#124](https://github.com/flavorjones/loofah/issues/124)
267
+ * Allow negative values in CSS properties. Restores functionality that was reverted in v2.0.3. [#91](https://github.com/flavorjones/loofah/issues/91)
115
268
 
116
269
 
117
270
  ## 2.0.3 / 2015-08-17
118
271
 
119
272
  ### Bug fixes:
120
273
 
121
- * Revert support for negative values in CSS properties due to slow performance. #90 (Related to #85.)
274
+ * Revert support for negative values in CSS properties due to slow performance. [#90](https://github.com/flavorjones/loofah/issues/90) (Related to [#85](https://github.com/flavorjones/loofah/issues/85).)
122
275
 
123
276
 
124
277
  ## 2.0.2 / 2015-05-05
125
278
 
126
279
  ### Bug fixes:
127
280
 
128
- * Fix error with `#to_text` when Loofah::Helpers hadn't been required. #75
129
- * Allow multi-word data attributes. #84 (Thanks, @jstorimer!)
130
- * Allow negative values in CSS properties. #85 (Thanks, @siddhartham!)
281
+ * Fix error with `#to_text` when Loofah::Helpers hadn't been required. [#75](https://github.com/flavorjones/loofah/issues/75)
282
+ * Allow multi-word data attributes. [#84](https://github.com/flavorjones/loofah/issues/84) (Thanks, [@jstorimer](https://github.com/jstorimer)!)
283
+ * Allow negative values in CSS properties. [#85](https://github.com/flavorjones/loofah/issues/85) (Thanks, [@siddhartham](https://github.com/siddhartham)!)
131
284
 
132
285
 
133
286
  ## 2.0.1 / 2014-08-21
134
287
 
135
288
  ### Bug fixes:
136
289
 
137
- * Load RR correctly when running test files directly. (Thanks, @ktdreyer!)
290
+ * Load RR correctly when running test files directly. (Thanks, [@ktdreyer](https://github.com/ktdreyer)!)
138
291
 
139
292
 
140
293
  ### Notes:
141
294
 
142
- * Extracted HTML5::Scrub#scrub_css_attribute to accommodate the Rails integration work. (Thanks, @kaspth!)
295
+ * Extracted HTML5::Scrub#scrub_css_attribute to accommodate the Rails integration work. (Thanks, [@kaspth](https://github.com/kaspth)!)
143
296
 
144
297
 
145
298
  ## 2.0.0 / 2014-05-09
@@ -155,19 +308,19 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
155
308
  * tags: `article`, `aside`, `bdi`, `bdo`, `canvas`, `command`, `datalist`, `details`, `figcaption`, `figure`, `footer`, `header`, `mark`, `meter`, `nav`, `output`, `section`, `summary`, `time`
156
309
  * attributes: `data-*` (Thanks, Rafael Franca!)
157
310
  * URI attributes: `poster` and `preload`
158
- * Addition of the `:unprintable` scrubber to remove unprintable characters from text nodes. #65 (Thanks, Matt Swanson!)
159
- * `Loofah.fragment` accepts an optional encoding argument, compatible with `Nokogiri::HTML::DocumentFragment.parse`. #62 (Thanks, Ben Atkins!)
311
+ * Addition of the `:unprintable` scrubber to remove unprintable characters from text nodes. [#65](https://github.com/flavorjones/loofah/issues/65) (Thanks, Matt Swanson!)
312
+ * `Loofah.fragment` accepts an optional encoding argument, compatible with `Nokogiri::HTML::DocumentFragment.parse`. [#62](https://github.com/flavorjones/loofah/issues/62) (Thanks, Ben Atkins!)
160
313
  * HTML5 sanitizers now remove attributes without values. (Thanks, Kasper Timm Hansen!)
161
314
 
162
315
  ### Bug fixes:
163
316
 
164
317
  * HTML5 sanitizers' CSS keyword check now actually works (broken in v2.0). Additional regression tests added. (Thanks, Kasper Timm Hansen!)
165
- * HTML5 sanitizers now allow negative arguments to CSS. #64 (Thanks, Jon Calhoun!)
318
+ * HTML5 sanitizers now allow negative arguments to CSS. [#64](https://github.com/flavorjones/loofah/issues/64) (Thanks, Jon Calhoun!)
166
319
 
167
320
 
168
321
  ## 1.2.1 (2012-04-14)
169
322
 
170
- * Declaring encoding in html5/scrub.rb. Without this, use of the ruby -KU option would cause havoc. (#32)
323
+ * Declaring encoding in html5/scrub.rb. Without this, use of the ruby -KU option would cause havoc. ([#32](https://github.com/flavorjones/loofah/issues/32))
171
324
 
172
325
 
173
326
  ## 1.2.0 (2011-08-08)
@@ -185,7 +338,7 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
185
338
  * Additional HTML5lib whitelist elements (from html5lib 1524:80b5efe26230).
186
339
  Up to date with HTML5lib ruby code as of 1723:7ee6a0331856.
187
340
  * Whitelists (which are not part of the public API) are now Sets (were previously Arrays).
188
- * Don't explode when encountering UTF-8 URIs. (#25, #29)
341
+ * Don't explode when encountering UTF-8 URIs. ([#25](https://github.com/flavorjones/loofah/issues/25), [#29](https://github.com/flavorjones/loofah/issues/29))
189
342
 
190
343
 
191
344
  ## 1.0.0 (2010-10-26)
@@ -203,7 +356,7 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
203
356
  * New methods Loofah::HTML::Document#to_text and
204
357
  Loofah::HTML::DocumentFragment#to_text do the right thing with
205
358
  whitespace. Note that these methods are significantly slower than
206
- #text. GH #12
359
+ #text. GH [#12](https://github.com/flavorjones/loofah/issues/12)
207
360
  * Loofah::Elements::BLOCK_LEVEL contains a canonical list of HTML4 block-level4 elements.
208
361
  * Loofah::HTML::Document#text and Loofah::HTML::DocumentFragment#text
209
362
  will return unescaped HTML entities by passing :encode_special_chars => false.
@@ -217,7 +370,7 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
217
370
 
218
371
  ### Bug fixes:
219
372
 
220
- * Loofah::XssFoliate was not properly escaping HTML entities when implicitly scrubbing a string attribute. GH #17
373
+ * Loofah::XssFoliate was not properly escaping HTML entities when implicitly scrubbing a string attribute. GH [#17](https://github.com/flavorjones/loofah/issues/17)
221
374
 
222
375
 
223
376
  ## 0.4.3 (2010-01-29)
@@ -245,7 +398,7 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
245
398
 
246
399
  ### Bug fixes:
247
400
 
248
- * Supporting Rails apps that aren't loading ActiveRecord. GH #10
401
+ * Supporting Rails apps that aren't loading ActiveRecord. GH [#10](https://github.com/flavorjones/loofah/issues/10)
249
402
 
250
403
  ### Miscellaneous:
251
404
 
@@ -306,13 +459,13 @@ This CVE's public notice is at https://github.com/flavorjones/loofah/issues/144
306
459
  ### Enhancements:
307
460
 
308
461
  * when loaded in a Rails app, automatically extend ActiveRecord::Base
309
- with html_fragment and html_document. GH #6 (Thanks Josh Nichols!)
462
+ with html_fragment and html_document. GH [#6](https://github.com/flavorjones/loofah/issues/6) (Thanks Josh Nichols!)
310
463
 
311
464
  ### Bugfixes:
312
465
 
313
466
  * ActiveRecord scrubbing should generate strings instead of Document or
314
- DocumentFragment objects. GH #5
315
- * init.rb fixed to support installation as a Rails plugin. GH #6
467
+ DocumentFragment objects. GH [#5](https://github.com/flavorjones/loofah/issues/5)
468
+ * init.rb fixed to support installation as a Rails plugin. GH [#6](https://github.com/flavorjones/loofah/issues/6)
316
469
  (Thanks Josh Nichols!)
317
470
 
318
471
 
data/README.md CHANGED
@@ -1,15 +1,13 @@
1
1
  # Loofah
2
2
 
3
3
  * https://github.com/flavorjones/loofah
4
- * Docs: http://rubydoc.info/github/flavorjones/loofah/master/frames
4
+ * Docs: http://rubydoc.info/github/flavorjones/loofah/main/frames
5
5
  * Mailing list: [loofah-talk@googlegroups.com](https://groups.google.com/forum/#!forum/loofah-talk)
6
6
 
7
7
  ## Status
8
8
 
9
- |System|Status|
10
- |--|--|
11
- | Concourse CI | [![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/loofah/jobs/ruby-2.5/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/loofah?groups=master) |
12
- | Code Climate | [![Code Climate](https://codeclimate.com/github/flavorjones/loofah.svg)](https://codeclimate.com/github/flavorjones/loofah) |
9
+ [![ci](https://github.com/flavorjones/loofah/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/flavorjones/loofah/actions/workflows/ci.yml)
10
+ [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/loofah)](https://tidelift.com/subscription/pkg/rubygems-loofah?utm_source=rubygems-loofah&utm_medium=referral&utm_campaign=readme)
13
11
 
14
12
 
15
13
  ## Description
@@ -135,13 +133,12 @@ and `text` to return plain text:
135
133
  doc.text # => "ohai! div is safe "
136
134
  ```
137
135
 
138
- Also, `to_text` is available, which does the right thing with
139
- whitespace around block-level elements.
136
+ Also, `to_text` is available, which does the right thing with whitespace around block-level and line break elements.
140
137
 
141
138
  ``` ruby
142
- doc = Loofah.fragment("<h1>Title</h1><div>Content</div>")
143
- doc.text # => "TitleContent" # probably not what you want
144
- doc.to_text # => "\nTitle\n\nContent\n" # better
139
+ doc = Loofah.fragment("<h1>Title</h1><div>Content<br>Next line</div>")
140
+ doc.text # => "TitleContentNext line" # probably not what you want
141
+ doc.to_text # => "\nTitle\n\nContent\nNext line\n" # better
145
142
  ```
146
143
 
147
144
  ### Loofah::XML::Document and Loofah::XML::DocumentFragment
@@ -212,7 +209,7 @@ end
212
209
  Loofah.xml_document(File.read('plague.xml')).scrub!(bring_out_your_dead)
213
210
  ```
214
211
 
215
- === Built-In HTML Scrubbers
212
+ ### Built-In HTML Scrubbers
216
213
 
217
214
  Loofah comes with a set of sanitizing scrubbers that use HTML5lib's
218
215
  safelist algorithm:
@@ -301,6 +298,10 @@ And the mailing list is on Google Groups:
301
298
 
302
299
  And the IRC channel is \#loofah on freenode.
303
300
 
301
+ Consider subscribing to [Tidelift][tidelift] which provides license assurances and timely security notifications for your open source dependencies, including Loofah. [Tidelift][tidelift] subscriptions also help the Loofah maintainers fund our [automated testing](https://ci.nokogiri.org) which in turn allows us to ship releases, bugfixes, and security updates more often.
302
+
303
+ [tidelift]: https://tidelift.com/subscription/pkg/rubygems-loofah?utm_source=undefined&utm_medium=referral&utm_campaign=enterprise
304
+
304
305
 
305
306
  ## Security
306
307
 
@@ -347,7 +348,7 @@ And a big shout-out to Corey Innis for the name, and feedback on the API.
347
348
 
348
349
  ## Thank You
349
350
 
350
- The following people have generously donated via the [Pledgie](http://pledgie.com) badge on the [Loofah github page](https://github.com/flavorjones/loofah):
351
+ The following people have generously funded Loofah:
351
352
 
352
353
  * Bill Harding
353
354
 
@@ -1,91 +1,95 @@
1
- require 'set'
1
+ # frozen_string_literal: true
2
+ require "set"
2
3
 
3
4
  module Loofah
4
5
  module Elements
5
6
  STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
6
- address
7
- blockquote
8
- center
9
- dir
10
- div
11
- dl
12
- fieldset
13
- form
14
- h1
15
- h2
16
- h3
17
- h4
18
- h5
19
- h6
20
- hr
21
- isindex
22
- menu
23
- noframes
24
- noscript
25
- ol
26
- p
27
- pre
28
- table
29
- ul
30
- ]
7
+ address
8
+ blockquote
9
+ center
10
+ dir
11
+ div
12
+ dl
13
+ fieldset
14
+ form
15
+ h1
16
+ h2
17
+ h3
18
+ h4
19
+ h5
20
+ h6
21
+ hr
22
+ isindex
23
+ menu
24
+ noframes
25
+ noscript
26
+ ol
27
+ p
28
+ pre
29
+ table
30
+ ul
31
+ ]
31
32
 
32
33
  # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
33
34
  STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
34
- address
35
- article
36
- aside
37
- blockquote
38
- canvas
39
- dd
40
- div
41
- dl
42
- dt
43
- fieldset
44
- figcaption
45
- figure
46
- footer
47
- form
48
- h1
49
- h2
50
- h3
51
- h4
52
- h5
53
- h6
54
- header
55
- hgroup
56
- hr
57
- li
58
- main
59
- nav
60
- noscript
61
- ol
62
- output
63
- p
64
- pre
65
- section
66
- table
67
- tfoot
68
- ul
69
- video
70
- ]
71
-
72
- STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
35
+ address
36
+ article
37
+ aside
38
+ blockquote
39
+ canvas
40
+ dd
41
+ div
42
+ dl
43
+ dt
44
+ fieldset
45
+ figcaption
46
+ figure
47
+ footer
48
+ form
49
+ h1
50
+ h2
51
+ h3
52
+ h4
53
+ h5
54
+ h6
55
+ header
56
+ hgroup
57
+ hr
58
+ li
59
+ main
60
+ nav
61
+ noscript
62
+ ol
63
+ output
64
+ p
65
+ pre
66
+ section
67
+ table
68
+ tfoot
69
+ ul
70
+ video
71
+ ]
73
72
 
74
73
  # The following elements may also be considered block-level
75
74
  # elements since they may contain block-level elements
76
75
  LOOSE_BLOCK_LEVEL = Set.new %w[dd
77
- dt
78
- frameset
79
- li
80
- tbody
81
- td
82
- tfoot
83
- th
84
- thead
85
- tr
86
- ]
76
+ dt
77
+ frameset
78
+ li
79
+ tbody
80
+ td
81
+ tfoot
82
+ th
83
+ thead
84
+ tr
85
+ ]
87
86
 
87
+ # Elements that aren't block but should generate a newline in #to_text
88
+ INLINE_LINE_BREAK = Set.new(["br"])
89
+
90
+ STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
88
91
  BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
92
+ LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
89
93
  end
90
94
 
91
95
  ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants ::Loofah::Elements