hpricot_scrub 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +1 -0
- data/lib/hpricot_scrub/hpricot_scrub.rb +94 -0
- data/lib/hpricot_scrub/version.rb +1 -1
- data/lib/hpricot_scrub.rb +4 -1
- metadata +2 -1
data/Manifest.txt
CHANGED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
if defined?(Kernel::gem)
|
4
|
+
gem('hpricot', '>= 0.5')
|
5
|
+
else
|
6
|
+
require_gem('hpricot', '>= 0.5')
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'hpricot'
|
10
|
+
|
11
|
+
module Hpricot
|
12
|
+
module Scrubable
|
13
|
+
def scrubable?
|
14
|
+
! [Hpricot::Text, Hpricot::BogusETag].include?(self.class)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Elements
|
19
|
+
def strip
|
20
|
+
each { |x| x.strip }
|
21
|
+
end
|
22
|
+
|
23
|
+
def strip_attributes(safe=[])
|
24
|
+
each { |x| x.strip_attributes(safe) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class BaseEle
|
29
|
+
include Scrubable
|
30
|
+
end
|
31
|
+
|
32
|
+
class Elem
|
33
|
+
include Scrubable
|
34
|
+
|
35
|
+
def scrub(config)
|
36
|
+
children.reverse.each { |c|
|
37
|
+
c.scrub(config) if c.scrubable? && ! config[:allow_tags].include?(c.name)
|
38
|
+
}
|
39
|
+
strip unless config[:allow_tags].include?(name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def remove
|
43
|
+
parent.children.delete(self)
|
44
|
+
end
|
45
|
+
|
46
|
+
def strip
|
47
|
+
children.each { |c| c.strip if c.scrubable? }
|
48
|
+
|
49
|
+
if strip_removes?
|
50
|
+
remove
|
51
|
+
else
|
52
|
+
parent.replace_child self, Hpricot.make(inner_html) unless parent.nil?
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def strip_attributes(safe=[])
|
57
|
+
attributes.each {|atr|
|
58
|
+
remove_attribute(atr[0]) unless safe.include?(atr[0])
|
59
|
+
} unless attributes.nil?
|
60
|
+
end
|
61
|
+
|
62
|
+
def strip_removes?
|
63
|
+
# I'm sure there are others that shuould be ripped instead of stripped
|
64
|
+
attributes && attributes['type'] =~ /script|css/
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Doc
|
69
|
+
def scrub(config={})
|
70
|
+
config = {
|
71
|
+
:remove_tags => [],
|
72
|
+
:allow_tags => [],
|
73
|
+
:allow_attributes => []
|
74
|
+
}.merge(config)
|
75
|
+
|
76
|
+
config[:remove_tags].each { |tag| (self/tag).remove }
|
77
|
+
config[:allow_tags].each { |tag|
|
78
|
+
(self/tag).strip_attributes(config[:allow_attributes])
|
79
|
+
}
|
80
|
+
children.reverse.each {|c| c.scrub(config) if c.scrubable? }
|
81
|
+
self
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class String
|
87
|
+
def scrub!
|
88
|
+
self.gsub!(/^(\n|.)*$/, Hpricot(self).scrub.inner_html)
|
89
|
+
end
|
90
|
+
|
91
|
+
def scrub
|
92
|
+
dup.scrub!
|
93
|
+
end
|
94
|
+
end
|
data/lib/hpricot_scrub.rb
CHANGED
@@ -1 +1,4 @@
|
|
1
|
-
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
1
|
+
# Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
2
|
+
|
3
|
+
require File.dirname(__FILE__) + '/hpricot_scrub/hpricot_scrub.rb'
|
4
|
+
require File.dirname(__FILE__) + '/hpricot_scrub/version.rb'
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.1
|
|
3
3
|
specification_version: 1
|
4
4
|
name: hpricot_scrub
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.2.
|
6
|
+
version: 0.2.1
|
7
7
|
date: 2007-03-04 00:00:00 -08:00
|
8
8
|
summary: Scrub HTML with Hpricot
|
9
9
|
require_paths:
|
@@ -34,6 +34,7 @@ files:
|
|
34
34
|
- CHANGELOG.txt
|
35
35
|
- Manifest.txt
|
36
36
|
- setup.rb
|
37
|
+
- lib/hpricot_scrub/hpricot_scrub.rb
|
37
38
|
- lib/hpricot_scrub/version.rb
|
38
39
|
- lib/hpricot_scrub.rb
|
39
40
|
- test/test_helper.rb
|