hpricot_scrub 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -3,6 +3,7 @@ README.txt
3
3
  CHANGELOG.txt
4
4
  Manifest.txt
5
5
  setup.rb
6
+ lib/hpricot_scrub/hpricot_scrub.rb
6
7
  lib/hpricot_scrub/version.rb
7
8
  lib/hpricot_scrub.rb
8
9
  test/test_helper.rb
@@ -0,0 +1,94 @@
1
+ require 'rubygems'
2
+
3
+ if defined?(Kernel::gem)
4
+ gem('hpricot', '>= 0.5')
5
+ else
6
+ require_gem('hpricot', '>= 0.5')
7
+ end
8
+
9
+ require 'hpricot'
10
+
11
+ module Hpricot
12
+ module Scrubable
13
+ def scrubable?
14
+ ! [Hpricot::Text, Hpricot::BogusETag].include?(self.class)
15
+ end
16
+ end
17
+
18
+ class Elements
19
+ def strip
20
+ each { |x| x.strip }
21
+ end
22
+
23
+ def strip_attributes(safe=[])
24
+ each { |x| x.strip_attributes(safe) }
25
+ end
26
+ end
27
+
28
+ class BaseEle
29
+ include Scrubable
30
+ end
31
+
32
+ class Elem
33
+ include Scrubable
34
+
35
+ def scrub(config)
36
+ children.reverse.each { |c|
37
+ c.scrub(config) if c.scrubable? && ! config[:allow_tags].include?(c.name)
38
+ }
39
+ strip unless config[:allow_tags].include?(name)
40
+ end
41
+
42
+ def remove
43
+ parent.children.delete(self)
44
+ end
45
+
46
+ def strip
47
+ children.each { |c| c.strip if c.scrubable? }
48
+
49
+ if strip_removes?
50
+ remove
51
+ else
52
+ parent.replace_child self, Hpricot.make(inner_html) unless parent.nil?
53
+ end
54
+ end
55
+
56
+ def strip_attributes(safe=[])
57
+ attributes.each {|atr|
58
+ remove_attribute(atr[0]) unless safe.include?(atr[0])
59
+ } unless attributes.nil?
60
+ end
61
+
62
+ def strip_removes?
63
+ # I'm sure there are others that shuould be ripped instead of stripped
64
+ attributes && attributes['type'] =~ /script|css/
65
+ end
66
+ end
67
+
68
+ class Doc
69
+ def scrub(config={})
70
+ config = {
71
+ :remove_tags => [],
72
+ :allow_tags => [],
73
+ :allow_attributes => []
74
+ }.merge(config)
75
+
76
+ config[:remove_tags].each { |tag| (self/tag).remove }
77
+ config[:allow_tags].each { |tag|
78
+ (self/tag).strip_attributes(config[:allow_attributes])
79
+ }
80
+ children.reverse.each {|c| c.scrub(config) if c.scrubable? }
81
+ self
82
+ end
83
+ end
84
+ end
85
+
86
+ class String
87
+ def scrub!
88
+ self.gsub!(/^(\n|.)*$/, Hpricot(self).scrub.inner_html)
89
+ end
90
+
91
+ def scrub
92
+ dup.scrub!
93
+ end
94
+ end
@@ -2,7 +2,7 @@ module HpricotScrub #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/hpricot_scrub.rb CHANGED
@@ -1 +1,4 @@
1
- Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
1
+ # Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
2
+
3
+ require File.dirname(__FILE__) + '/hpricot_scrub/hpricot_scrub.rb'
4
+ require File.dirname(__FILE__) + '/hpricot_scrub/version.rb'
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.1
3
3
  specification_version: 1
4
4
  name: hpricot_scrub
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
6
+ version: 0.2.1
7
7
  date: 2007-03-04 00:00:00 -08:00
8
8
  summary: Scrub HTML with Hpricot
9
9
  require_paths:
@@ -34,6 +34,7 @@ files:
34
34
  - CHANGELOG.txt
35
35
  - Manifest.txt
36
36
  - setup.rb
37
+ - lib/hpricot_scrub/hpricot_scrub.rb
37
38
  - lib/hpricot_scrub/version.rb
38
39
  - lib/hpricot_scrub.rb
39
40
  - test/test_helper.rb