hpricot_scrub 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -3,6 +3,7 @@ README.txt
3
3
  CHANGELOG.txt
4
4
  Manifest.txt
5
5
  setup.rb
6
+ lib/hpricot_scrub/hpricot_scrub.rb
6
7
  lib/hpricot_scrub/version.rb
7
8
  lib/hpricot_scrub.rb
8
9
  test/test_helper.rb
@@ -0,0 +1,94 @@
1
+ require 'rubygems'
2
+
3
+ if defined?(Kernel::gem)
4
+ gem('hpricot', '>= 0.5')
5
+ else
6
+ require_gem('hpricot', '>= 0.5')
7
+ end
8
+
9
+ require 'hpricot'
10
+
11
+ module Hpricot
12
+ module Scrubable
13
+ def scrubable?
14
+ ! [Hpricot::Text, Hpricot::BogusETag].include?(self.class)
15
+ end
16
+ end
17
+
18
+ class Elements
19
+ def strip
20
+ each { |x| x.strip }
21
+ end
22
+
23
+ def strip_attributes(safe=[])
24
+ each { |x| x.strip_attributes(safe) }
25
+ end
26
+ end
27
+
28
+ class BaseEle
29
+ include Scrubable
30
+ end
31
+
32
+ class Elem
33
+ include Scrubable
34
+
35
+ def scrub(config)
36
+ children.reverse.each { |c|
37
+ c.scrub(config) if c.scrubable? && ! config[:allow_tags].include?(c.name)
38
+ }
39
+ strip unless config[:allow_tags].include?(name)
40
+ end
41
+
42
+ def remove
43
+ parent.children.delete(self)
44
+ end
45
+
46
+ def strip
47
+ children.each { |c| c.strip if c.scrubable? }
48
+
49
+ if strip_removes?
50
+ remove
51
+ else
52
+ parent.replace_child self, Hpricot.make(inner_html) unless parent.nil?
53
+ end
54
+ end
55
+
56
+ def strip_attributes(safe=[])
57
+ attributes.each {|atr|
58
+ remove_attribute(atr[0]) unless safe.include?(atr[0])
59
+ } unless attributes.nil?
60
+ end
61
+
62
+ def strip_removes?
63
+ # I'm sure there are others that shuould be ripped instead of stripped
64
+ attributes && attributes['type'] =~ /script|css/
65
+ end
66
+ end
67
+
68
+ class Doc
69
+ def scrub(config={})
70
+ config = {
71
+ :remove_tags => [],
72
+ :allow_tags => [],
73
+ :allow_attributes => []
74
+ }.merge(config)
75
+
76
+ config[:remove_tags].each { |tag| (self/tag).remove }
77
+ config[:allow_tags].each { |tag|
78
+ (self/tag).strip_attributes(config[:allow_attributes])
79
+ }
80
+ children.reverse.each {|c| c.scrub(config) if c.scrubable? }
81
+ self
82
+ end
83
+ end
84
+ end
85
+
86
+ class String
87
+ def scrub!
88
+ self.gsub!(/^(\n|.)*$/, Hpricot(self).scrub.inner_html)
89
+ end
90
+
91
+ def scrub
92
+ dup.scrub!
93
+ end
94
+ end
@@ -2,7 +2,7 @@ module HpricotScrub #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/hpricot_scrub.rb CHANGED
@@ -1 +1,4 @@
1
- Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
1
+ # Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
2
+
3
+ require File.dirname(__FILE__) + '/hpricot_scrub/hpricot_scrub.rb'
4
+ require File.dirname(__FILE__) + '/hpricot_scrub/version.rb'
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.1
3
3
  specification_version: 1
4
4
  name: hpricot_scrub
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
6
+ version: 0.2.1
7
7
  date: 2007-03-04 00:00:00 -08:00
8
8
  summary: Scrub HTML with Hpricot
9
9
  require_paths:
@@ -34,6 +34,7 @@ files:
34
34
  - CHANGELOG.txt
35
35
  - Manifest.txt
36
36
  - setup.rb
37
+ - lib/hpricot_scrub/hpricot_scrub.rb
37
38
  - lib/hpricot_scrub/version.rb
38
39
  - lib/hpricot_scrub.rb
39
40
  - test/test_helper.rb