hpricot_scrub 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +1 -0
- data/lib/hpricot_scrub/hpricot_scrub.rb +94 -0
- data/lib/hpricot_scrub/version.rb +1 -1
- data/lib/hpricot_scrub.rb +4 -1
- metadata +2 -1
data/Manifest.txt
CHANGED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
if defined?(Kernel::gem)
|
4
|
+
gem('hpricot', '>= 0.5')
|
5
|
+
else
|
6
|
+
require_gem('hpricot', '>= 0.5')
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'hpricot'
|
10
|
+
|
11
|
+
module Hpricot
|
12
|
+
module Scrubable
|
13
|
+
def scrubable?
|
14
|
+
! [Hpricot::Text, Hpricot::BogusETag].include?(self.class)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Elements
|
19
|
+
def strip
|
20
|
+
each { |x| x.strip }
|
21
|
+
end
|
22
|
+
|
23
|
+
def strip_attributes(safe=[])
|
24
|
+
each { |x| x.strip_attributes(safe) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class BaseEle
|
29
|
+
include Scrubable
|
30
|
+
end
|
31
|
+
|
32
|
+
class Elem
|
33
|
+
include Scrubable
|
34
|
+
|
35
|
+
def scrub(config)
|
36
|
+
children.reverse.each { |c|
|
37
|
+
c.scrub(config) if c.scrubable? && ! config[:allow_tags].include?(c.name)
|
38
|
+
}
|
39
|
+
strip unless config[:allow_tags].include?(name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def remove
|
43
|
+
parent.children.delete(self)
|
44
|
+
end
|
45
|
+
|
46
|
+
def strip
|
47
|
+
children.each { |c| c.strip if c.scrubable? }
|
48
|
+
|
49
|
+
if strip_removes?
|
50
|
+
remove
|
51
|
+
else
|
52
|
+
parent.replace_child self, Hpricot.make(inner_html) unless parent.nil?
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def strip_attributes(safe=[])
|
57
|
+
attributes.each {|atr|
|
58
|
+
remove_attribute(atr[0]) unless safe.include?(atr[0])
|
59
|
+
} unless attributes.nil?
|
60
|
+
end
|
61
|
+
|
62
|
+
def strip_removes?
|
63
|
+
# I'm sure there are others that shuould be ripped instead of stripped
|
64
|
+
attributes && attributes['type'] =~ /script|css/
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Doc
|
69
|
+
def scrub(config={})
|
70
|
+
config = {
|
71
|
+
:remove_tags => [],
|
72
|
+
:allow_tags => [],
|
73
|
+
:allow_attributes => []
|
74
|
+
}.merge(config)
|
75
|
+
|
76
|
+
config[:remove_tags].each { |tag| (self/tag).remove }
|
77
|
+
config[:allow_tags].each { |tag|
|
78
|
+
(self/tag).strip_attributes(config[:allow_attributes])
|
79
|
+
}
|
80
|
+
children.reverse.each {|c| c.scrub(config) if c.scrubable? }
|
81
|
+
self
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class String
|
87
|
+
def scrub!
|
88
|
+
self.gsub!(/^(\n|.)*$/, Hpricot(self).scrub.inner_html)
|
89
|
+
end
|
90
|
+
|
91
|
+
def scrub
|
92
|
+
dup.scrub!
|
93
|
+
end
|
94
|
+
end
|
data/lib/hpricot_scrub.rb
CHANGED
@@ -1 +1,4 @@
|
|
1
|
-
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
1
|
+
# Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/hpricot_scrub/hpricot_scrub.rb'
|
4
|
+
require File.dirname(__FILE__) + '/hpricot_scrub/version.rb'
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.1
|
|
3
3
|
specification_version: 1
|
4
4
|
name: hpricot_scrub
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
6
|
+
version: 0.2.1
|
7
7
|
date: 2007-03-04 00:00:00 -08:00
|
8
8
|
summary: Scrub HTML with Hpricot
|
9
9
|
require_paths:
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- CHANGELOG.txt
|
35
35
|
- Manifest.txt
|
36
36
|
- setup.rb
|
37
|
+
- lib/hpricot_scrub/hpricot_scrub.rb
|
37
38
|
- lib/hpricot_scrub/version.rb
|
38
39
|
- lib/hpricot_scrub.rb
|
39
40
|
- test/test_helper.rb
|