suung-undress 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/CHANGELOG +23 -0
- data/LICENSE +22 -0
- data/README.rdoc +42 -0
- data/Rakefile +32 -0
- data/lib/core_ext/object.rb +6 -0
- data/lib/hpricot_ext.rb +88 -0
- data/lib/undress.rb +118 -0
- data/lib/undress/grammar.rb +188 -0
- data/lib/undress/greencloth.rb +142 -0
- data/lib/undress/textile.rb +126 -0
- data/test/test_grammar.rb +75 -0
- data/test/test_greencloth.rb +435 -0
- data/test/test_helper.rb +11 -0
- data/test/test_hpricot_ext.rb +57 -0
- data/test/test_textile.rb +313 -0
- data/undress.gemspec +45 -0
- metadata +125 -0
data/.gitignore
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
* Fix spaces and <br> inside table cells
|
2
|
+
|
3
|
+
0.2.3
|
4
|
+
|
5
|
+
* Add Hpricot style extension
|
6
|
+
* Move hpricot extensions from undress.rb to a single file
|
7
|
+
* Convert to Textile single formatted letters inside a word
|
8
|
+
|
9
|
+
0.2.2
|
10
|
+
|
11
|
+
* Adding a \n after [[toc]] in greencloth parser
|
12
|
+
|
13
|
+
0.2.1
|
14
|
+
|
15
|
+
* <span> with more than one styling bold|italic|underline|line-through
|
16
|
+
* <p> styling Italics, Underlines, Lines thorough and Bold are converted.
|
17
|
+
|
18
|
+
0.2
|
19
|
+
|
20
|
+
* Fix on removing new lines, tabs and spaces on element and tag nodes.
|
21
|
+
* xhtmlize! method added with:
|
22
|
+
** Invalid nested <ul> and <ol> lists are converted.
|
23
|
+
** <span> styling Italics, Underlines, Lines thorough and Bold are converted.
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Nicolas Sanguinetti, entp.com
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
= Undress
|
2
|
+
|
3
|
+
Easily convert back HTML to Textile, Greencloth.
|
4
|
+
|
5
|
+
require "undress/textile"
|
6
|
+
|
7
|
+
code =<<html
|
8
|
+
<h1>Hello world!</h1>
|
9
|
+
<p><strong>Hey!</strong> How is it going?</p>
|
10
|
+
<h2>Supported Markup Languages so far:</h2>
|
11
|
+
<ul>
|
12
|
+
<li>Textile</li>
|
13
|
+
<li>Greencloth</li>
|
14
|
+
</ul>
|
15
|
+
html
|
16
|
+
|
17
|
+
Undress(code).to_textile
|
18
|
+
|
19
|
+
Will produce
|
20
|
+
|
21
|
+
h1. Hello world!
|
22
|
+
|
23
|
+
*Hey!* How is it going?
|
24
|
+
|
25
|
+
h2. Supported Markup Languages so far:
|
26
|
+
|
27
|
+
* Textile
|
28
|
+
* Greencloth
|
29
|
+
|
30
|
+
== Supported Markup Languages
|
31
|
+
|
32
|
+
* Textile
|
33
|
+
* Greencloth, see [http://we.riseup.net]
|
34
|
+
|
35
|
+
== Get it
|
36
|
+
|
37
|
+
gem install zevarito-undress
|
38
|
+
|
39
|
+
== License
|
40
|
+
|
41
|
+
Authors:: Nicolas Sanguinetti (foca[http://github.com/foca]), Alvaro Gil (zevarito[http://github.com/zevarito])
|
42
|
+
License:: MIT (Check LICENSE for details)
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require "rake/testtask"
|
2
|
+
|
3
|
+
begin
|
4
|
+
require "hanna/rdoctask"
|
5
|
+
rescue LoadError
|
6
|
+
require "rake/rdoctask"
|
7
|
+
end
|
8
|
+
|
9
|
+
Rake::RDocTask.new do |rd|
|
10
|
+
rd.main = "README"
|
11
|
+
rd.title = "API Documentation for Undress"
|
12
|
+
rd.rdoc_files.include("README.rdoc", "LICENSE", "lib/**/*.rb")
|
13
|
+
rd.rdoc_dir = "doc"
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
require "metric_fu"
|
18
|
+
rescue LoadError
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
require "mg"
|
23
|
+
MG.new("undress.gemspec")
|
24
|
+
rescue LoadError
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "Default: run tests"
|
28
|
+
task :default => :test
|
29
|
+
|
30
|
+
Rake::TestTask.new do |t|
|
31
|
+
t.test_files = FileList["test/test_*.rb"]
|
32
|
+
end
|
data/lib/hpricot_ext.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require "hpricot"
|
2
|
+
|
3
|
+
module ::Hpricot #:nodoc:
|
4
|
+
module Elem::Trav
|
5
|
+
def set_style(name, value)
|
6
|
+
styles[name.to_s] = value.fast_xs
|
7
|
+
end
|
8
|
+
|
9
|
+
def del_style(name)
|
10
|
+
styles.delete(name)
|
11
|
+
end
|
12
|
+
|
13
|
+
def has_style?(name)
|
14
|
+
styles.has_style?(name)
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_style(name)
|
18
|
+
styles[name]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Styles
|
23
|
+
def initialize e
|
24
|
+
@element = e
|
25
|
+
end
|
26
|
+
|
27
|
+
def delete(key)
|
28
|
+
p = properties.dup
|
29
|
+
if p.delete key
|
30
|
+
@element.set_attribute("style", "#{p.map {|pty,val| "#{pty}:#{val}"}.join(";")}")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def [] key
|
35
|
+
properties[key]
|
36
|
+
end
|
37
|
+
|
38
|
+
def []= k, v
|
39
|
+
s = properties.map {|pty,val| "#{pty}:#{val}"}.join(";")
|
40
|
+
@element.set_attribute("style", "#{s.chomp(";")};#{k}:#{v}".sub(/^\;/, ""))
|
41
|
+
end
|
42
|
+
|
43
|
+
def empty?
|
44
|
+
return true if properties.size == 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def has_style?(key)
|
48
|
+
properties.has_key?(key)
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_s
|
52
|
+
properties.to_s
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_h
|
56
|
+
properties
|
57
|
+
end
|
58
|
+
|
59
|
+
def properties
|
60
|
+
return {} if not @element.has_attribute?("style")
|
61
|
+
@element.get_attribute("style").split(";").inject({}) do |hash,v|
|
62
|
+
v = v.split(":")
|
63
|
+
hash.update v.first.strip => v.last.strip
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Elem #:nodoc:
|
69
|
+
def ancestors
|
70
|
+
node, ancestors = parent, Elements[]
|
71
|
+
while node.respond_to?(:parent) && node.parent
|
72
|
+
ancestors << node
|
73
|
+
node = node.parent
|
74
|
+
end
|
75
|
+
ancestors
|
76
|
+
end
|
77
|
+
|
78
|
+
def change_tag!(new_tag, preserve_attr = true)
|
79
|
+
return if not etag
|
80
|
+
self.name = new_tag
|
81
|
+
attributes.each {|k,v| remove_attribute(k)} if not preserve_attr
|
82
|
+
end
|
83
|
+
|
84
|
+
def styles
|
85
|
+
Styles.new self
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/undress.rb
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/hpricot_ext")
|
2
|
+
require File.expand_path(File.dirname(__FILE__) + "/core_ext/object")
|
3
|
+
require File.expand_path(File.dirname(__FILE__) + "/undress/grammar")
|
4
|
+
|
5
|
+
# Load an HTML document so you can undress it. Pass it either a string or an IO
|
6
|
+
# object. You can pass an optional hash of options, which will be forwarded
|
7
|
+
# straight to Hpricot. Check it's
|
8
|
+
# documentation[http://code.whytheluckystiff.net/doc/hpricot] for details.
|
9
|
+
def Undress(html, options={})
|
10
|
+
Undress::Document.new(html, options)
|
11
|
+
end
|
12
|
+
|
13
|
+
module Undress
|
14
|
+
|
15
|
+
INLINE_ELEMENTS = ['span', 'b', 'strong', 'i', 'em', 'ins', 'del','strike', 'abbr', 'acronym', 'cite', 'code', 'label', 'sub', 'sup']
|
16
|
+
|
17
|
+
# Register a markup language. The name will become the method used to convert
|
18
|
+
# HTML to this markup language: for example registering the name +:textile+
|
19
|
+
# gives you <tt>Undress(code).to_textile</tt>, registering +:markdown+ would
|
20
|
+
# give you <tt>Undress(code).to_markdown</tt>, etc.
|
21
|
+
def self.add_markup(name, grammar)
|
22
|
+
Document.add_markup(name, grammar)
|
23
|
+
end
|
24
|
+
|
25
|
+
class Document #:nodoc:
|
26
|
+
def initialize(html, options)
|
27
|
+
@doc = Hpricot(html, options)
|
28
|
+
xhtmlize!
|
29
|
+
cleanup_indentation
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.add_markup(name, grammar)
|
33
|
+
define_method "to_#{name}" do
|
34
|
+
grammar.process!(@doc)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# We try to fix those elements which aren't write as xhtml standard but more
|
41
|
+
# important we can't parse it ok without correct it before.
|
42
|
+
def xhtmlize!
|
43
|
+
(@doc/"ul|ol").each {|list| fixup_list(list) if list.parent != "li" && list.parent.name !~ /ul|ol/}
|
44
|
+
(@doc/"p|span").each {|e| fixup_span_with_styles(e)}
|
45
|
+
(@doc/"strike").each {|e| e.change_tag! "del"}
|
46
|
+
(@doc/"u").each {|e| e.change_tag! "ins"}
|
47
|
+
(@doc/"td|th").each {|e| fixup_cells(e)}
|
48
|
+
end
|
49
|
+
|
50
|
+
# Delete tabs, newlines and more than 2 spaces from inside elements
|
51
|
+
# except <pre> or <code> elements
|
52
|
+
def cleanup_indentation
|
53
|
+
(@doc/"*").each do |e|
|
54
|
+
if e.elem? && e.inner_html != "" && e.name !~ (/pre|code/) && e.children.size == 0
|
55
|
+
e.inner_html = e.inner_html.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
56
|
+
elsif e.text? && e.parent.name !~ /pre|code/
|
57
|
+
e.content = e.content.gsub(/\n|\t/,"").gsub(/\s+/," ")
|
58
|
+
e.content = e.content.gsub(/^\s+$/, "") if e.next_node && ! INLINE_ELEMENTS.include?(e.next_node.name)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# For those elements like <span> if they are used to represent bold, italic
|
64
|
+
# such as those used on wysiwyg editors, we remove that after convert to not
|
65
|
+
# use them on the final convertion.
|
66
|
+
def fixup_span_with_styles(e)
|
67
|
+
return if !e.has_attribute?("style")
|
68
|
+
|
69
|
+
if e.get_style("font-style") == "italic"
|
70
|
+
e.inner_html = "<em>#{e.inner_html}</em>"
|
71
|
+
e.del_style("font-style")
|
72
|
+
end
|
73
|
+
|
74
|
+
if e.get_style("text-decoration") == "underline"
|
75
|
+
e.inner_html = "<ins>#{e.inner_html}</ins>"
|
76
|
+
e.del_style("text-decoration")
|
77
|
+
end
|
78
|
+
|
79
|
+
if e.get_style("text-decoration") == "line-through"
|
80
|
+
e.inner_html = "<del>#{e.inner_html}</del>"
|
81
|
+
e.del_style("text-decoration")
|
82
|
+
end
|
83
|
+
|
84
|
+
if e.get_style("font-weight") == "bold"
|
85
|
+
e.inner_html = "<strong>#{e.inner_html}</strong>"
|
86
|
+
e.del_style("font-weight")
|
87
|
+
end
|
88
|
+
|
89
|
+
e.swap e.inner_html if e.styles.empty? && e.name == "span"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Fixup a badly nested list such as <ul> sibling to <li> instead inside of <li>.
|
93
|
+
def fixup_list(list)
|
94
|
+
list.children.each {|e| fixup_list(e) if e.elem? && e.name =~ /ol|ul/}
|
95
|
+
|
96
|
+
if list.parent.name != "li"
|
97
|
+
li_side = list.next_sibling if list.next_sibling && list.next_sibling.name == "li"
|
98
|
+
li_side = list.previous_sibling if list.previous_sibling && list.previous_sibling.name == "li"
|
99
|
+
|
100
|
+
if li_side
|
101
|
+
li_side.inner_html = "#{li_side.inner_html}#{list.to_html}"
|
102
|
+
list.parent.replace_child(list, "")
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# spaces beetween td and th elements break textile formatting
|
108
|
+
# <br> aren't allowed
|
109
|
+
# strip spaces
|
110
|
+
def fixup_cells(e)
|
111
|
+
e.search("br").remove
|
112
|
+
e.next_node.content = "" if e.next_node && e.next_node.text?
|
113
|
+
e.previous_node.content = "" if e.previous_node && e.previous_node.text?
|
114
|
+
content = e.inner_html.gsub(/\ \;/,"\s").strip
|
115
|
+
e.inner_html = content == "" ? [] : content
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
module Undress
|
2
|
+
# Grammars give you a DSL to declare how to convert an HTML document into a
|
3
|
+
# different markup language.
|
4
|
+
class Grammar
|
5
|
+
def self.inherited(base) # :nodoc:
|
6
|
+
base.instance_variable_set(:@post_processing_rules, post_processing_rules)
|
7
|
+
base.instance_variable_set(:@pre_processing_rules, pre_processing_rules)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Add a parsing rule for a group of html tags.
|
11
|
+
#
|
12
|
+
# rule_for :p do |element|
|
13
|
+
# "<this was a paragraph>#{content_of(element)}</this was a paragraph>"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# will replace your <tt><p></tt> tags for <tt><this was a paragraph></tt>
|
17
|
+
# tags, without altering the contents.
|
18
|
+
#
|
19
|
+
# The element yielded to the block is an Hpricot element for the given tag.
|
20
|
+
def self.rule_for(*tags, &handler) # :yields: element
|
21
|
+
tags.each do |tag|
|
22
|
+
define_method tag.to_sym, &handler
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Set a default rule for unrecognized tags.
|
27
|
+
#
|
28
|
+
# Unless you define a special case, it will ignore the tags and just output
|
29
|
+
# the contents of unrecognized tags.
|
30
|
+
def self.default(&handler) # :yields: element
|
31
|
+
define_method :method_missing do |tag, node, *args|
|
32
|
+
handler.call(node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a post-processing rule to your parser.
|
37
|
+
#
|
38
|
+
# This takes a regular expression that will be applied to the output after
|
39
|
+
# processing any nodes. It can take a string as a replacement, or a block
|
40
|
+
# that will be passed to String#gsub.
|
41
|
+
#
|
42
|
+
# post_processing(/\n\n+/, "\n\n") # compress more than two newlines
|
43
|
+
# post_processing(/whatever/) { ... }
|
44
|
+
def self.post_processing(regexp, replacement = nil, &handler) #:yields: matched_string
|
45
|
+
post_processing_rules[regexp] = replacement || handler
|
46
|
+
end
|
47
|
+
|
48
|
+
# Add a pre-processing rule to your parser.
|
49
|
+
#
|
50
|
+
# This lets you mutate the DOM before applying any rule defined with
|
51
|
+
# +rule_for+. You need to pass a CSS/XPath selector, and a block that
|
52
|
+
# takes an Hpricot element to parse it.
|
53
|
+
#
|
54
|
+
# pre_processing "ul.toc" do |element|
|
55
|
+
# element.swap("<p>[[toc]]</p>")
|
56
|
+
# end
|
57
|
+
#
|
58
|
+
# Would replace any unordered lists with the class +toc+ for a
|
59
|
+
# paragraph containing the code <tt>[[toc]]</tt>.
|
60
|
+
def self.pre_processing(selector, &handler) # :yields: element
|
61
|
+
pre_processing_rules[selector] = handler
|
62
|
+
end
|
63
|
+
|
64
|
+
# Set a list of attributes you wish to whitelist
|
65
|
+
#
|
66
|
+
# Any attribute not in this list at the moment of parsing will be ignored by the
|
67
|
+
# parser. The method Grammar#attributes(node) will return a hash of the filtered
|
68
|
+
# attributes. Read its documentation for more details.
|
69
|
+
#
|
70
|
+
# whitelist_attributes :id, :class, :lang
|
71
|
+
def self.whitelist_attributes(*attrs)
|
72
|
+
@whitelisted_attributes = attrs
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.whitelisted_attributes #:nodoc:
|
76
|
+
@whitelisted_attributes || []
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.post_processing_rules #:nodoc:
|
80
|
+
@post_processing_rules ||= {}
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.pre_processing_rules #:nodoc:
|
84
|
+
@pre_processing_rules ||= {}
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.process!(node) #:nodoc:
|
88
|
+
new.process!(node)
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_reader :pre_processing_rules #:nodoc:
|
92
|
+
attr_reader :post_processing_rules #:nodoc:
|
93
|
+
attr_reader :whitelisted_attributes #:nodoc:
|
94
|
+
|
95
|
+
def initialize #:nodoc:
|
96
|
+
@pre_processing_rules = self.class.pre_processing_rules.dup
|
97
|
+
@post_processing_rules = self.class.post_processing_rules.dup
|
98
|
+
@whitelisted_attributes = self.class.whitelisted_attributes.dup
|
99
|
+
end
|
100
|
+
|
101
|
+
# Process a DOM node, converting it to your markup language according to
|
102
|
+
# your defined rules. If the node is a Text node, it will return it's
|
103
|
+
# string representation. Otherwise it will call the rule defined for it.
|
104
|
+
def process(nodes)
|
105
|
+
Array(nodes).map do |node|
|
106
|
+
if node.text?
|
107
|
+
node.to_html
|
108
|
+
elsif node.elem?
|
109
|
+
send node.name.to_sym, node if ! defined?(ALLOWED_TAGS) || ALLOWED_TAGS.empty? || ALLOWED_TAGS.include?(node.name)
|
110
|
+
else
|
111
|
+
""
|
112
|
+
end
|
113
|
+
end.join("")
|
114
|
+
end
|
115
|
+
|
116
|
+
def process!(node) #:nodoc:
|
117
|
+
pre_processing_rules.each do |selector, handler|
|
118
|
+
node.search(selector).each(&handler)
|
119
|
+
end
|
120
|
+
|
121
|
+
process(node.children).tap do |text|
|
122
|
+
post_processing_rules.each do |rule, handler|
|
123
|
+
handler.is_a?(String) ? text.gsub!(rule, handler) : text.gsub!(rule, &handler)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Get the result of parsing the contents of a node.
|
129
|
+
def content_of(node)
|
130
|
+
process(node.respond_to?(:children) ? node.children : node)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Helper method that tells you if the given DOM node is immediately
|
134
|
+
# surrounded by whitespace.
|
135
|
+
def surrounded_by_whitespace?(node)
|
136
|
+
(node.previous && node.previous.text? && node.previous.to_s =~ /\s+$/) ||
|
137
|
+
(node.next && node.next.text? && node.next.to_s =~ /^\s+/)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Helper to determine if a node contents a whole word
|
141
|
+
# useful to convert for example a letter italic inside a word
|
142
|
+
def complete_word?(node)
|
143
|
+
p, n = node.previous_node, node.next_node
|
144
|
+
|
145
|
+
return true if !p && !n
|
146
|
+
|
147
|
+
if p.respond_to?(:content)
|
148
|
+
return false if p.content !~ /\s$/
|
149
|
+
elsif p.respond_to?(:inner_html)
|
150
|
+
return false if p.inner_html !~ /\s$/
|
151
|
+
end
|
152
|
+
|
153
|
+
if n.respond_to?(:content)
|
154
|
+
return false if n.content !~ /^\s/
|
155
|
+
elsif n.respond_to?(:inner_html)
|
156
|
+
return false if n.inner_html !~ /^\s/
|
157
|
+
end
|
158
|
+
true
|
159
|
+
end
|
160
|
+
|
161
|
+
# Hash of attributes, according to the white list. By default, no attributes
|
162
|
+
# are whitelisted, so you must set which ones to whitelist on each grammar.
|
163
|
+
#
|
164
|
+
# Supposing you set <tt>:id</tt> and <tt>:class</tt> as your
|
165
|
+
# <tt>whitelisted_attributes</tt>, and you have a node representing this
|
166
|
+
# HTML:
|
167
|
+
#
|
168
|
+
# <p lang="en" class="greeting">Hello World</p>
|
169
|
+
#
|
170
|
+
# Then the method would return:
|
171
|
+
#
|
172
|
+
# { :class => "greeting" }
|
173
|
+
#
|
174
|
+
# You can override this method in each grammar and call +super+ if you
|
175
|
+
# will represent your attributes consistently across all nodes (for
|
176
|
+
# example, +Textile+ always shows class an id inside parenthesis.)
|
177
|
+
def attributes(node)
|
178
|
+
node.attributes.to_hash.inject({}) do |attrs,(key,value)|
|
179
|
+
attrs[key.to_sym] = value if whitelisted_attributes.include?(key.to_sym)
|
180
|
+
attrs
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def method_missing(tag, node, *args) #:nodoc:
|
185
|
+
process(node.children)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|