loofah 2.2.3 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +124 -31
  3. data/README.md +12 -16
  4. data/lib/loofah.rb +35 -18
  5. data/lib/loofah/elements.rb +74 -73
  6. data/lib/loofah/helpers.rb +18 -7
  7. data/lib/loofah/html/document.rb +1 -0
  8. data/lib/loofah/html/document_fragment.rb +4 -2
  9. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  10. data/lib/loofah/html5/safelist.rb +819 -0
  11. data/lib/loofah/html5/scrub.rb +63 -46
  12. data/lib/loofah/instance_methods.rb +5 -3
  13. data/lib/loofah/metahelpers.rb +2 -1
  14. data/lib/loofah/scrubber.rb +8 -7
  15. data/lib/loofah/scrubbers.rb +12 -11
  16. data/lib/loofah/version.rb +5 -0
  17. data/lib/loofah/xml/document.rb +1 -0
  18. data/lib/loofah/xml/document_fragment.rb +2 -1
  19. metadata +40 -112
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -40
  23. data/Rakefile +0 -79
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/lib/loofah/html5/whitelist.rb +0 -186
  29. data/test/assets/msword.html +0 -63
  30. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  31. data/test/helper.rb +0 -18
  32. data/test/html5/test_sanitizer.rb +0 -382
  33. data/test/integration/test_ad_hoc.rb +0 -204
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
data/.gemtest DELETED
File without changes
data/Gemfile DELETED
@@ -1,22 +0,0 @@
1
- # -*- ruby -*-
2
-
3
- # DO NOT EDIT THIS FILE. Instead, edit Rakefile, and run `rake bundler:gemfile`.
4
-
5
- source "https://rubygems.org/"
6
-
7
- gem "nokogiri", ">=1.5.9"
8
- gem "crass", "~>1.0.2"
9
-
10
- gem "rake", ">=0.8", :group => [:development, :test]
11
- gem "minitest", "~>2.2", :group => [:development, :test]
12
- gem "rr", "~>1.2.0", :group => [:development, :test]
13
- gem "json", ">=0", :group => [:development, :test]
14
- gem "hoe-gemspec", ">=0", :group => [:development, :test]
15
- gem "hoe-debugging", ">=0", :group => [:development, :test]
16
- gem "hoe-bundler", ">=0", :group => [:development, :test]
17
- gem "hoe-git", ">=0", :group => [:development, :test]
18
- gem "concourse", ">=0.15.0", :group => [:development, :test]
19
- gem "rdoc", "~>4.0", :group => [:development, :test]
20
- gem "hoe", "~>3.16", :group => [:development, :test]
21
-
22
- # vim: syntax=ruby
data/Manifest.txt DELETED
@@ -1,40 +0,0 @@
1
- .gemtest
2
- CHANGELOG.md
3
- Gemfile
4
- MIT-LICENSE.txt
5
- Manifest.txt
6
- README.md
7
- Rakefile
8
- SECURITY.md
9
- benchmark/benchmark.rb
10
- benchmark/fragment.html
11
- benchmark/helper.rb
12
- benchmark/www.slashdot.com.html
13
- lib/loofah.rb
14
- lib/loofah/elements.rb
15
- lib/loofah/helpers.rb
16
- lib/loofah/html/document.rb
17
- lib/loofah/html/document_fragment.rb
18
- lib/loofah/html5/libxml2_workarounds.rb
19
- lib/loofah/html5/scrub.rb
20
- lib/loofah/html5/whitelist.rb
21
- lib/loofah/instance_methods.rb
22
- lib/loofah/metahelpers.rb
23
- lib/loofah/scrubber.rb
24
- lib/loofah/scrubbers.rb
25
- lib/loofah/xml/document.rb
26
- lib/loofah/xml/document_fragment.rb
27
- test/assets/msword.html
28
- test/assets/testdata_sanitizer_tests1.dat
29
- test/helper.rb
30
- test/html5/test_sanitizer.rb
31
- test/integration/test_ad_hoc.rb
32
- test/integration/test_helpers.rb
33
- test/integration/test_html.rb
34
- test/integration/test_scrubbers.rb
35
- test/integration/test_xml.rb
36
- test/unit/test_api.rb
37
- test/unit/test_encoding.rb
38
- test/unit/test_helpers.rb
39
- test/unit/test_scrubber.rb
40
- test/unit/test_scrubbers.rb
data/Rakefile DELETED
@@ -1,79 +0,0 @@
1
- require 'rubygems'
2
- gem 'hoe', '>= 2.3.0'
3
- require 'hoe'
4
- require 'concourse'
5
-
6
- Hoe.plugin :git
7
- Hoe.plugin :gemspec
8
- Hoe.plugin :bundler
9
- Hoe.plugin :debugging
10
-
11
- Hoe.spec "loofah" do
12
- developer "Mike Dalessio", "mike.dalessio@gmail.com"
13
- developer "Bryan Helmkamp", "bryan@brynary.com"
14
-
15
- self.extra_rdoc_files = FileList["*.md"]
16
- self.history_file = "CHANGELOG.md"
17
- self.readme_file = "README.md"
18
- self.license "MIT"
19
-
20
- extra_deps << ["nokogiri", ">=1.5.9"]
21
- extra_deps << ["crass", "~> 1.0.2"]
22
-
23
- extra_dev_deps << ["rake", ">=0.8"]
24
- extra_dev_deps << ["minitest", "~>2.2"]
25
- extra_dev_deps << ["rr", "~>1.2.0"]
26
- extra_dev_deps << ["json", ">=0"]
27
- extra_dev_deps << ["hoe-gemspec", ">=0"]
28
- extra_dev_deps << ["hoe-debugging", ">=0"]
29
- extra_dev_deps << ["hoe-bundler", ">=0"]
30
- extra_dev_deps << ["hoe-git", ">=0"]
31
- extra_dev_deps << ["concourse", ">=0.15.0"]
32
- end
33
-
34
- task :gemspec do
35
- system %q(rake debug_gem | grep -v "^\(in " > loofah.gemspec)
36
- end
37
-
38
- task :redocs => :fix_css
39
- task :docs => :fix_css
40
- task :fix_css do
41
- better_css = <<-EOT
42
- .method-description pre {
43
- margin : 1em 0 ;
44
- }
45
-
46
- .method-description ul {
47
- padding : .5em 0 .5em 2em ;
48
- }
49
-
50
- .method-description p {
51
- margin-top : .5em ;
52
- }
53
-
54
- #main ul, div#documentation ul {
55
- list-style-type : disc ! IMPORTANT ;
56
- list-style-position : inside ! IMPORTANT ;
57
- }
58
-
59
- h2 + ul {
60
- margin-top : 1em;
61
- }
62
- EOT
63
- puts "* fixing css"
64
- File.open("doc/rdoc.css", "a") { |f| f.write better_css }
65
- end
66
-
67
- desc "generate and upload docs to rubyforge"
68
- task :doc_upload_to_rubyforge => :docs do
69
- Dir.chdir "doc" do
70
- system "rsync -avz --delete * rubyforge.org:/var/www/gforge-projects/loofah/loofah"
71
- end
72
- end
73
-
74
- desc "generate whitelists from W3C specifications"
75
- task :generate_whitelists do
76
- load "tasks/generate-whitelists"
77
- end
78
-
79
- Concourse.new("loofah").create_tasks!
@@ -1,149 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require "#{File.dirname(__FILE__)}/helper.rb"
3
-
4
- def compare_scrub_methods
5
- snip = "<div>foo</div><foo>fuxx <b>quux</b></foo><script>i have a chair</script>"
6
- puts "starting with:\n#{snip}"
7
- puts
8
- puts RailsSanitize.new.sanitize(snip) # => Rails.sanitize / scrub!(:prune).to_s
9
- puts Loofah::Helpers.sanitize(snip)
10
- puts "--"
11
- puts RailsSanitize.new.strip_tags(snip) # => Rails.strip_tags / parse().text
12
- puts Loofah::Helpers.strip_tags(snip)
13
- puts "--"
14
- puts Sanitize.clean(snip, Sanitize::Config::RELAXED) # => scrub!(:strip).to_s
15
- puts Loofah.scrub_fragment(snip, :strip).to_s
16
- puts "--"
17
- puts HTML5libSanitize.new.sanitize(snip) # => scrub!(:escape).to_s
18
- puts Loofah.scrub_fragment(snip, :escape).to_s
19
- puts "--"
20
- puts HTMLFilter.new.filter(snip)
21
- puts Loofah.scrub_fragment(snip, :strip).to_s
22
- puts
23
- end
24
-
25
- module TestSet
26
- def test_set options={}
27
- scale = options[:rehearse] ? 10 : 1
28
- puts self.class.name
29
-
30
- n = 100 / scale
31
- puts " Large document, #{BIG_FILE.length} bytes (x#{n})"
32
- bench BIG_FILE, n, false
33
- puts
34
-
35
- n = 1000 / scale
36
- puts " Small fragment, #{FRAGMENT.length} bytes (x#{n})"
37
- bench FRAGMENT, n, true
38
- puts
39
-
40
- n = 10_000 / scale
41
- puts " Text snippet, #{SNIPPET.length} bytes (x#{n})"
42
- bench SNIPPET, n, true
43
- puts
44
- end
45
- end
46
-
47
- class HeadToHead < Measure
48
- end
49
-
50
- class HeadToHeadRailsSanitize < Measure
51
- include TestSet
52
- def bench(content, ntimes, fragment_p)
53
- clear_measure
54
-
55
- measure "Loofah::Helpers.sanitize", ntimes do
56
- Loofah::Helpers.sanitize content
57
- end
58
-
59
- sanitizer = RailsSanitize.new
60
- measure "ActionView sanitize", ntimes do
61
- sanitizer.sanitize(content)
62
- end
63
- end
64
- end
65
-
66
- class HeadToHeadRailsStripTags < Measure
67
- include TestSet
68
- def bench(content, ntimes, fragment_p)
69
- clear_measure
70
-
71
- measure "Loofah::Helpers.strip_tags", ntimes do
72
- Loofah::Helpers.strip_tags content
73
- end
74
-
75
- sanitizer = RailsSanitize.new
76
- measure "ActionView strip_tags", ntimes do
77
- sanitizer.strip_tags(content)
78
- end
79
- end
80
- end
81
-
82
- class HeadToHeadSanitizerSanitize < Measure
83
- include TestSet
84
- def bench(content, ntimes, fragment_p)
85
- clear_measure
86
-
87
- measure "Loofah :strip", ntimes do
88
- if fragment_p
89
- Loofah.scrub_fragment(content, :strip).to_s
90
- else
91
- Loofah.scrub_document(content, :strip).to_s
92
- end
93
- end
94
-
95
- measure "Sanitize.clean", ntimes do
96
- Sanitize.clean(content, Sanitize::Config::RELAXED)
97
- end
98
- end
99
- end
100
-
101
- class HeadToHeadHtml5LibSanitize < Measure
102
- include TestSet
103
- def bench(content, ntimes, fragment_p)
104
- clear_measure
105
-
106
- measure "Loofah :escape", ntimes do
107
- if fragment_p
108
- Loofah.scrub_fragment(content, :escape).to_s
109
- else
110
- Loofah.scrub_document(content, :escape).to_s
111
- end
112
- end
113
-
114
- html5_sanitizer = HTML5libSanitize.new
115
- measure "HTML5lib.sanitize", ntimes do
116
- html5_sanitizer.sanitize(content)
117
- end
118
- end
119
- end
120
-
121
- class HeadToHeadHTMLFilter < Measure
122
- include TestSet
123
- def bench(content, ntimes, fragment_p)
124
- clear_measure
125
-
126
- measure "Loofah::Helpers.sanitize", ntimes do
127
- Loofah::Helpers.sanitize content
128
- end
129
-
130
- sanitizer = HTMLFilter.new
131
- measure "HTMLFilter.filter", ntimes do
132
- sanitizer.filter(content)
133
- end
134
- end
135
- end
136
-
137
- puts "Nokogiri version: #{Nokogiri::VERSION_INFO.inspect}"
138
- puts "Loofah version: #{Loofah::VERSION.inspect}"
139
-
140
- benches = []
141
- benches << HeadToHeadRailsSanitize.new
142
- benches << HeadToHeadRailsStripTags.new
143
- benches << HeadToHeadSanitizerSanitize.new
144
- benches << HeadToHeadHtml5LibSanitize.new
145
- benches << HeadToHeadHTMLFilter.new
146
- puts "---------- rehearsal ----------"
147
- benches.each { |bench| bench.test_set :rehearse => true }
148
- puts "---------- realsies ----------"
149
- benches.each { |bench| bench.test_set }
@@ -1,96 +0,0 @@
1
- <div id="top_parent"></div>
2
-
3
- <div id="jump">
4
- <a href="#main-articles">Stories</a>
5
- <br>
6
- <a href="#blocks">Slash Boxes</a>
7
- <br>
8
- <a href="#comments">Comments</a>
9
- </div>
10
- <a name="topothepage"></a>
11
- <div id="doc3" class="yui-t6 index2 mainpage ac ">
12
- <div id="hd" >
13
- <div id="logo" >
14
-
15
-
16
-
17
- <h1><a href="//slashdot.org"><span>Slashdot</span></a></h1>
18
- <div id="slogan"><h2>News for nerds, stuff that matters</h2></div>
19
- </div>
20
- <a href="#articles" class="hidden">Jump to articles</a>
21
- <div class="nav">
22
- <ul>
23
-
24
-
25
-
26
- <li><a href="//slashdot.org/submit.pl" title="Submit a story to Slashdot">Submit Story</a></li>
27
- <li><a href="//slashdot.org/help" title="Frequently asked questions on Slashdot">Help</a></li>
28
- <li><a href="//slashdot.org/login.pl" onclick="show_login_box(); return false;">Log In</a></li>
29
-
30
- </ul>
31
- </div>
32
-
33
-
34
-
35
-
36
-
37
- <div id="fh_picker_search" style="display: block;">
38
- <form method="get" action="//slashdot.org/index2.pl">
39
- <fieldset class="mode-filter mode-anon">
40
- <legend>Search</legend>
41
-
42
-
43
- <input class="query" type="text" name="fhfilter" value="" id="searchquery"> <input type="button" class="setfhfilter" value="Filter" id="viewsearch" style="display:none"> <input type="submit" class="setsearchfilter" value="Search" id="fhsearch" style="display:none">
44
- <noscript><input type="submit" class="setsearchfilter" value="Search"></noscript>
45
-
46
- <script type="text/javascript">
47
- var slash_search;
48
- $(function(){
49
- if (has_hose()) {
50
- var $search_text = $any('searchquery'),
51
- $panel = $search_text.closest('fieldset');
52
- $search_buttons = $('#viewsearch,#fhsearch'),
53
- ws = /\s+/;
54
-
55
-
56
-
57
- // The search buttons set the firehose option named by their class.
58
- $search_buttons.
59
- click(function(){
60
- var which=this.className;
61
- $search_text.each(function(){
62
- firehose_set_options(which, this.value);
63
- });
64
- return false;
65
- });
66
-
67
- // Provide a globally available function that does whatever clicking the search button would do.
68
- slash_search = function( query ){
69
- query!==undefined && $search_text.val(query);
70
- $search_buttons.filter(':visible:first').click();
71
- };
72
-
73
- $search_text.
74
- keydown(function( e ){ // ESCAPE restores the filter in-effect.
75
- if ( e.which == $.ui.keyCode.ESCAPE ) {
76
- $search_text.val(firehose_settings.fhfilter||'');
77
- return true;
78
- }
79
- if ( e.which == $.ui.keyCode.ENTER ) {
80
- slash_search();
81
- return false;
82
- }
83
- });
84
-
85
- $(document).
86
- bind('firehose-setting-setfhfilter firehose-setting-setsearchfilter', function( e, new_query ){
87
- $('fieldset input[type=text]').each(function(){
88
- $(this).blur().val(new_query);
89
- });
90
- }).
91
- bind('set-options.firehose', function( e, data ){
92
- data.select_section && $panel.toggleClass('mode-filter', data.id!=='unsaved');
93
- });
94
- }
95
- });
96
- </script>
data/benchmark/helper.rb DELETED
@@ -1,73 +0,0 @@
1
- require 'rubygems'
2
- require 'open-uri'
3
- require 'hpricot'
4
- require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
5
- require 'benchmark'
6
- require "action_view"
7
- require "action_controller/vendor/html-scanner"
8
- require "sanitize"
9
- require 'hitimes'
10
- require 'htmlfilter'
11
-
12
- unless defined?(HTMLFilter)
13
- HTMLFilter = HtmlFilter
14
- end
15
-
16
- class RailsSanitize
17
- include ActionView::Helpers::SanitizeHelper
18
- extend ActionView::Helpers::SanitizeHelper::ClassMethods
19
- end
20
-
21
- class HTML5libSanitize
22
- require 'html5/html5parser'
23
- require 'html5/liberalxmlparser'
24
- require 'html5/treewalkers'
25
- require 'html5/treebuilders'
26
- require 'html5/serializer'
27
- require 'html5/sanitizer'
28
-
29
- include HTML5
30
-
31
- def sanitize(html)
32
- HTMLParser.parse_fragment(html, {
33
- :tokenizer => HTMLSanitizer,
34
- :encoding => 'utf-8',
35
- :tree => TreeBuilders::REXML::TreeBuilder
36
- }).to_s
37
- end
38
- end
39
-
40
- BIG_FILE = File.read(File.join(File.dirname(__FILE__), "www.slashdot.com.html"))
41
- FRAGMENT = File.read(File.join(File.dirname(__FILE__), "fragment.html"))
42
- SNIPPET = "This is typical form field input in <b>length and content."
43
-
44
- class Measure
45
- def initialize
46
- clear_measure
47
- end
48
-
49
- def clear_measure
50
- @first_time = true
51
- @baseline = nil
52
- end
53
-
54
- def measure(name, ntimes)
55
- if @first_time
56
- printf " %-30s %7s %8s %5s\n", "", "total", "single", "rel"
57
- @first_time = false
58
- end
59
- timer = Hitimes::TimedMetric.new(name)
60
- timer.start
61
- ntimes.times do |j|
62
- yield
63
- end
64
- timer.stop
65
- if @baseline
66
- printf " %30s %7.3f (%8.6f) %5.2fx\n", timer.name, timer.sum, timer.sum / ntimes, timer.sum / @baseline
67
- else
68
- @baseline = timer.sum
69
- printf " %30s %7.3f (%8.6f) %5s\n", timer.name, timer.sum, timer.sum / ntimes, "-"
70
- end
71
- timer.sum
72
- end
73
- end