junebug 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +8 -0
- data/README +7 -7
- data/RELEASE_NOTES +7 -0
- data/Rakefile +6 -3
- data/bin/junebug +0 -5
- data/deploy/static/style/base.css +12 -3
- data/deploy/wiki +1 -2
- data/lib/junebug.rb +2 -0
- data/lib/junebug/controllers.rb +12 -12
- data/lib/junebug/generator.rb +18 -1
- data/lib/junebug/models.rb +7 -3
- data/lib/junebug/redcloth.rb +5 -0
- data/lib/junebug/redcloth/all_formats.rb +4 -0
- data/lib/junebug/redcloth/base.rb +674 -0
- data/lib/junebug/redcloth/docbook.rb +1006 -0
- data/lib/junebug/redcloth/markdown.rb +138 -0
- data/lib/junebug/redcloth/textile.rb +449 -0
- data/lib/junebug/views.rb +3 -3
- data/test/wiki_test.rb +128 -40
- metadata +11 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
v0.0.15 2006-11-17
|
2
|
+
|
3
|
+
* Loosen up mongrel requirements for windows users -- thanks deejay
|
4
|
+
* Bugfix for static file issue #6536. Thanks zimbatm
|
5
|
+
* Added css for wrapping pre text
|
6
|
+
* Changed wikiword syntax to [[link]]
|
7
|
+
* Unit testing improvements
|
8
|
+
|
1
9
|
v0.0.14 2006-11-14
|
2
10
|
|
3
11
|
* Page title cleanup
|
data/README
CHANGED
@@ -32,27 +32,27 @@ To create your Junebug wiki:
|
|
32
32
|
This creates a directory 'testwiki' with the necessary files.
|
33
33
|
|
34
34
|
> cd testwiki
|
35
|
-
>
|
35
|
+
> ruby wiki run
|
36
36
|
|
37
37
|
View your new wiki at: http://localhost:3301
|
38
38
|
|
39
39
|
Once everything seems to be running fine, you can set the wiki to run in the background. Hit ctrl-C to kill the wiki, and then type
|
40
40
|
|
41
|
-
>
|
41
|
+
> ruby wiki start
|
42
42
|
|
43
43
|
You can change default configuration (host, port, startpage, etc.. ) by editing the config.yml file. For the changes to take effect, just restart the wiki:
|
44
44
|
|
45
|
-
>
|
45
|
+
> ruby wiki restart
|
46
46
|
|
47
47
|
|
48
48
|
== Notes
|
49
49
|
|
50
50
|
Starting and stopping the wiki:
|
51
51
|
|
52
|
-
>
|
53
|
-
>
|
54
|
-
>
|
55
|
-
>
|
52
|
+
> ruby wiki start
|
53
|
+
> ruby wiki stop
|
54
|
+
> ruby wiki restart
|
55
|
+
> ruby wiki run
|
56
56
|
|
57
57
|
== Credits
|
58
58
|
|
data/RELEASE_NOTES
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
v0.0.15
|
2
|
+
|
3
|
+
The wikiword syntax changed in this release from just straight WikiLink to [[WikiLink]]. Just using camelcase was having trouble coexisting with textile, so I opted instead for the instiki convention which is less problematic.
|
4
|
+
|
5
|
+
This change will reqire updating any pre-existing pages to the new wikiword syntax.
|
6
|
+
|
7
|
+
Note that this format also supports the [[WikiLink|different label]] syntax.
|
data/Rakefile
CHANGED
@@ -7,11 +7,13 @@ require 'rake/gempackagetask'
|
|
7
7
|
require 'rake/testtask'
|
8
8
|
require 'rake/rdoctask'
|
9
9
|
|
10
|
+
require 'junebug'
|
11
|
+
|
10
12
|
Gem.manage_gems
|
11
13
|
|
12
14
|
gem_spec = Gem::Specification.new do |s|
|
13
15
|
s.name = 'junebug'
|
14
|
-
s.version = '0.0.
|
16
|
+
s.version = '0.0.15'
|
15
17
|
s.summary = "Junebug is a minimalist ruby wiki."
|
16
18
|
s.description = "Junebug is a minimalist ruby wiki running on Camping."
|
17
19
|
s.author = "Tim Myrtle"
|
@@ -21,10 +23,10 @@ gem_spec = Gem::Specification.new do |s|
|
|
21
23
|
s.require_paths = ['lib']
|
22
24
|
s.bindir = 'bin'
|
23
25
|
s.executables = ['junebug']
|
24
|
-
s.files = FileList['README','LICENSE','CHANGELOG','Rakefile','lib/**/*','deploy/**/*','dump/**/*']
|
26
|
+
s.files = FileList['README', 'LICENSE', 'CHANGELOG', 'RELEASE_NOTES', 'Rakefile', 'lib/**/*', 'deploy/**/*', 'dump/**/*']
|
25
27
|
s.test_files = FileList['test/**/*']
|
26
28
|
|
27
|
-
s.add_dependency('mongrel', '>=0.3.13.
|
29
|
+
s.add_dependency('mongrel', '>=0.3.13.3')
|
28
30
|
s.add_dependency('camping', '>=1.5')
|
29
31
|
s.add_dependency('RedCloth', '>=3.0.4')
|
30
32
|
s.add_dependency('daemons')
|
@@ -51,6 +53,7 @@ task :clean => :clobber_package do
|
|
51
53
|
rm 'deploy/junebug.log', :force => true
|
52
54
|
Dir['deploy/dump/*'].each { |ext| rm ext }
|
53
55
|
rm 'test/test.log', :force => true
|
56
|
+
rm 'config.yml', :force => true
|
54
57
|
end
|
55
58
|
|
56
59
|
|
data/bin/junebug
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
/* Baseline Junebug style */
|
1
2
|
|
2
3
|
a:link, a:visited {
|
3
4
|
text-decoration: none;
|
@@ -8,7 +9,6 @@ a:hover {
|
|
8
9
|
color: #00f;
|
9
10
|
}
|
10
11
|
|
11
|
-
|
12
12
|
form { display: inline; }
|
13
13
|
|
14
14
|
span.actions a {
|
@@ -146,7 +146,7 @@ body {
|
|
146
146
|
|
147
147
|
/* WIKI CONTENT STYLES */
|
148
148
|
.content {
|
149
|
-
font-size:
|
149
|
+
font-size: 107%;
|
150
150
|
padding: 25px;
|
151
151
|
min-height: 300px;
|
152
152
|
}
|
@@ -233,6 +233,16 @@ body {
|
|
233
233
|
padding: 8px 20px;
|
234
234
|
}
|
235
235
|
|
236
|
+
/* http://longren.org/2006/09/27/wrapping-text-inside-pre-tags */
|
237
|
+
/* Browser specific (not valid) styles to make preformatted text wrap */
|
238
|
+
.content pre {
|
239
|
+
white-space: pre-wrap; /* css-3 */
|
240
|
+
white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
|
241
|
+
white-space: -pre-wrap; /* Opera 4-6 */
|
242
|
+
white-space: -o-pre-wrap; /* Opera 7 */
|
243
|
+
word-wrap: break-word; /* Internet Explorer 5.5+ */
|
244
|
+
}
|
245
|
+
|
236
246
|
.content table {
|
237
247
|
}
|
238
248
|
|
@@ -245,4 +255,3 @@ body {
|
|
245
255
|
font-weight: bold;
|
246
256
|
background-color: #d7e0ff;
|
247
257
|
}
|
248
|
-
/* http://longren.org/2006/09/27/wrapping-text-inside-pre-tags */
|
data/deploy/wiki
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
$:.unshift File.dirname(__FILE__) + "/../lib"
|
4
3
|
|
5
4
|
require 'rubygems'
|
@@ -9,5 +8,5 @@ require 'junebug/config'
|
|
9
8
|
|
10
9
|
JUNEBUG_ROOT = ENV['JUNEBUG_ROOT'] = File.dirname(File.expand_path(__FILE__))
|
11
10
|
|
12
|
-
Daemons.run(Junebug::Config.script, :dir_mode=>:normal, :dir=>
|
11
|
+
Daemons.run(Junebug::Config.script, :dir_mode=>:normal, :dir=>JUNEBUG_ROOT)
|
13
12
|
|
data/lib/junebug.rb
CHANGED
data/lib/junebug/controllers.rb
CHANGED
@@ -9,17 +9,17 @@ module Junebug::Controllers
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
class Show < R '/(\w+)', '/(\w+)/(\d+)'
|
12
|
+
class Show < R '/([\w ]+)', '/([\w ]+)/(\d+)'
|
13
13
|
def get page_name, version = nil
|
14
14
|
@page_title = page_name
|
15
15
|
#redirect(Edit, page_name, 1) and return unless @page = Page.find_by_title(page_name)
|
16
|
-
redirect("#{Junebug.config['url']}/#{page_name}/1/edit") and return unless @page = Page.find_by_title(page_name)
|
16
|
+
redirect("#{Junebug.config['url']}/#{page_name.gsub(/ /,'+')}/1/edit") and return unless @page = Page.find_by_title(page_name)
|
17
17
|
@version = (version.nil? or version == @page.version.to_s) ? @page : @page.versions.find_by_version(version)
|
18
18
|
render :show
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
class Edit < R '/(\w+)/edit', '/(\w+)/(\d+)/edit'
|
22
|
+
class Edit < R '/([\w ]+)/edit', '/([\w ]+)/(\d+)/edit'
|
23
23
|
def get page_name, version = nil
|
24
24
|
redirect("#{Junebug.config['url']}/login") and return unless logged_in?
|
25
25
|
@page_title = "Edit #{page_name}"
|
@@ -36,15 +36,15 @@ module Junebug::Controllers
|
|
36
36
|
attrs[:readonly] = input.post_readonly if is_admin?
|
37
37
|
if Page.find_or_create_by_title(page_name).update_attributes( attrs )
|
38
38
|
# redirect Show, input.post_title
|
39
|
-
redirect "#{Junebug.config['url']}/#{input.post_title}"
|
39
|
+
redirect "#{Junebug.config['url']}/#{input.post_title.gsub(/ /,'+')}"
|
40
40
|
end
|
41
41
|
else
|
42
|
-
redirect "#{Junebug.config['url']}/#{page_name}"
|
42
|
+
redirect "#{Junebug.config['url']}/#{page_name.gsub(/ /,'+')}"
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
-
class Delete < R '/(\w+)/delete'
|
47
|
+
class Delete < R '/([\w ]+)/delete'
|
48
48
|
def get page_name
|
49
49
|
redirect("#{Junebug.config['url']}/login") and return unless logged_in?
|
50
50
|
Page.find_by_title(page_name).destroy() if is_admin?
|
@@ -53,15 +53,15 @@ module Junebug::Controllers
|
|
53
53
|
|
54
54
|
end
|
55
55
|
|
56
|
-
class Revert < R '/(\w+)/(\d+)/revert'
|
56
|
+
class Revert < R '/([\w ]+)/(\d+)/revert'
|
57
57
|
def get page_name, version
|
58
58
|
redirect("#{Junebug.config['url']}/login") and return unless logged_in?
|
59
59
|
Page.find_by_title(page_name).revert_to!(version) if is_admin?
|
60
|
-
redirect "#{Junebug.config['url']}/#{page_name}"
|
60
|
+
redirect "#{Junebug.config['url']}/#{page_name.gsub(/ /,'+')}"
|
61
61
|
end
|
62
62
|
end
|
63
63
|
|
64
|
-
class Versions < R '/(\w+)/versions'
|
64
|
+
class Versions < R '/([\w ]+)/versions'
|
65
65
|
def get page_name
|
66
66
|
@page_title = "Version History: #{page_name}"
|
67
67
|
@page = Page.find_by_title(page_name)
|
@@ -78,7 +78,7 @@ module Junebug::Controllers
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
class Backlinks < R '/(\w+)/backlinks'
|
81
|
+
class Backlinks < R '/([\w ]+)/backlinks'
|
82
82
|
def get page_name
|
83
83
|
@page = Page.find_by_title(page_name)
|
84
84
|
@page_title = "Backlinks for: #{page_name}"
|
@@ -95,7 +95,7 @@ module Junebug::Controllers
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
-
class Diff < R '/(\w+)/(\d+)/(\d+)/diff'
|
98
|
+
class Diff < R '/([\w ]+)/(\d+)/(\d+)/diff'
|
99
99
|
include HTMLDiff
|
100
100
|
def get page_name, v1, v2
|
101
101
|
@page_title = "Diff: #{page_name}"
|
@@ -122,7 +122,7 @@ module Junebug::Controllers
|
|
122
122
|
class Static < R '/static/(.+)'
|
123
123
|
MIME_TYPES = {'.css' => 'text/css', '.js' => 'text/javascript', '.jpg' => 'image/jpeg'}
|
124
124
|
#PATH = __FILE__[/(.*)\//, 1]
|
125
|
-
PATH = '.'
|
125
|
+
PATH = ENV['JUNEBUG_ROOT'] || '.'
|
126
126
|
|
127
127
|
def get(path)
|
128
128
|
@headers['Content-Type'] = MIME_TYPES[path[/\.\w+$/, 0]] || "text/plain"
|
data/lib/junebug/generator.rb
CHANGED
@@ -6,8 +6,25 @@ require 'junebug/models'
|
|
6
6
|
module Junebug
|
7
7
|
module Generator
|
8
8
|
extend self
|
9
|
-
|
9
|
+
|
10
10
|
def generate(args)
|
11
|
+
if args.empty? || (args[0] == '-h') || (args[0] == '--help')
|
12
|
+
puts <<END
|
13
|
+
Usage: junebug [options|wikiname]
|
14
|
+
|
15
|
+
Options:
|
16
|
+
-v, --version Display version
|
17
|
+
-h, --help Display this page
|
18
|
+
|
19
|
+
END
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
if (args[0] == '-v') || (args[0] == '--version')
|
24
|
+
puts "Junebug v#{Junebug::VERSION}"
|
25
|
+
return
|
26
|
+
end
|
27
|
+
|
11
28
|
src_root = File.dirname(__FILE__) + '/../../deploy'
|
12
29
|
app = ARGV.first
|
13
30
|
FileUtils.cp_r(src_root, app)
|
data/lib/junebug/models.rb
CHANGED
@@ -26,14 +26,18 @@ module Junebug::Models
|
|
26
26
|
|
27
27
|
class Page < Base
|
28
28
|
belongs_to :user
|
29
|
-
|
29
|
+
PAGE_LINK = /\[\[([^\]|]*)[|]?([^\]]*)\]\]/
|
30
30
|
#before_save { |r| r.title = r.title.underscore }
|
31
|
-
PAGE_LINK = /([A-Z][a-z]+[A-Z]\w+)/
|
31
|
+
#PAGE_LINK = /([A-Z][a-z]+[A-Z]\w+)/
|
32
32
|
validates_uniqueness_of :title
|
33
|
-
validates_format_of :title, :with =>
|
33
|
+
validates_format_of :title, :with => /^[\w ]+$/
|
34
34
|
validates_presence_of :title
|
35
35
|
acts_as_versioned
|
36
36
|
non_versioned_fields.push 'title'
|
37
|
+
|
38
|
+
def title=(text)
|
39
|
+
write_attribute(:title, text ? text.strip.squeeze(' ') : text)
|
40
|
+
end
|
37
41
|
end
|
38
42
|
|
39
43
|
class Page::Version < Base
|
@@ -0,0 +1,674 @@
|
|
1
|
+
class RedCloth < String
|
2
|
+
|
3
|
+
VERSION = '3.0.4'
|
4
|
+
DEFAULT_RULES = [] # let each class add to this array
|
5
|
+
TEXTILE_RULES = [:refs_textile, :block_textile_table, :block_textile_lists, :block_textile_defs,
|
6
|
+
:block_textile_prefix, :inline_textile_image, :inline_textile_link,
|
7
|
+
:inline_textile_code, :inline_textile_span, :glyphs_textile,
|
8
|
+
:inline_textile_autolink_urls, :inline_textile_autolink_emails]
|
9
|
+
MARKDOWN_RULES = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
|
10
|
+
:block_markdown_bq, :block_markdown_lists,
|
11
|
+
:inline_markdown_reflink, :inline_markdown_link]
|
12
|
+
DOCBOOK_RULES = [:refs_docbook, :block_docbook_table, :block_docbook_lists, :block_docbook_simple_lists,
|
13
|
+
:block_docbook_defs, :block_docbook_prefix, :inline_docbook_image, :inline_docbook_link,
|
14
|
+
:inline_docbook_code, :inline_docbook_glyphs, :inline_docbook_span,
|
15
|
+
:inline_docbook_wiki_words, :inline_docbook_wiki_links, :inline_docbook_autolink_urls,
|
16
|
+
:inline_docbook_autolink_emails]
|
17
|
+
@@escape_keyword ||= "redcloth"
|
18
|
+
|
19
|
+
#
|
20
|
+
# Two accessor for setting security restrictions.
|
21
|
+
#
|
22
|
+
# This is a nice thing if you're using RedCloth for
|
23
|
+
# formatting in public places (e.g. Wikis) where you
|
24
|
+
# don't want users to abuse HTML for bad things.
|
25
|
+
#
|
26
|
+
# If +:filter_html+ is set, HTML which wasn't
|
27
|
+
# created by the Textile processor will be escaped.
|
28
|
+
#
|
29
|
+
# If +:filter_styles+ is set, it will also disable
|
30
|
+
# the style markup specifier. ('{color: red}')
|
31
|
+
#
|
32
|
+
# If +:filter_classes+ is set, it will also disable
|
33
|
+
# class attributes. ('!(classname)image!')
|
34
|
+
#
|
35
|
+
# If +:filter_ids+ is set, it will also disable
|
36
|
+
# id attributes. ('!(classname#id)image!')
|
37
|
+
#
|
38
|
+
attr_accessor :filter_html, :filter_styles, :filter_classes, :filter_ids
|
39
|
+
|
40
|
+
#
|
41
|
+
# Accessor for toggling hard breaks.
|
42
|
+
#
|
43
|
+
# If +:hard_breaks+ is set, single newlines will
|
44
|
+
# be converted to HTML break tags. This is the
|
45
|
+
# default behavior for traditional RedCloth.
|
46
|
+
#
|
47
|
+
attr_accessor :hard_breaks
|
48
|
+
|
49
|
+
# Accessor for toggling lite mode.
|
50
|
+
#
|
51
|
+
# In lite mode, block-level rules are ignored. This means
|
52
|
+
# that tables, paragraphs, lists, and such aren't available.
|
53
|
+
# Only the inline markup for bold, italics, entities and so on.
|
54
|
+
#
|
55
|
+
# r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
|
56
|
+
# r.to_html
|
57
|
+
# #=> "And then? She <strong>fell</strong>!"
|
58
|
+
#
|
59
|
+
attr_accessor :lite_mode
|
60
|
+
|
61
|
+
#
|
62
|
+
# Accessor for toggling span caps.
|
63
|
+
#
|
64
|
+
# Textile places `span' tags around capitalized
|
65
|
+
# words by default, but this wreaks havoc on Wikis.
|
66
|
+
# If +:no_span_caps+ is set, this will be
|
67
|
+
# suppressed.
|
68
|
+
#
|
69
|
+
attr_accessor :no_span_caps
|
70
|
+
|
71
|
+
#
|
72
|
+
# Establishes the markup predence.
|
73
|
+
#
|
74
|
+
attr_accessor :rules
|
75
|
+
|
76
|
+
# Returns a new RedCloth object, based on _string_ and
|
77
|
+
# enforcing all the included _restrictions_.
|
78
|
+
#
|
79
|
+
# r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
|
80
|
+
# r.to_html
|
81
|
+
# #=>"<h1>A <b>bold</b> man</h1>"
|
82
|
+
#
|
83
|
+
def initialize( string, restrictions = [] )
|
84
|
+
restrictions.each { |r| method( "#{ r }=" ).call( true ) }
|
85
|
+
super( string )
|
86
|
+
end
|
87
|
+
|
88
|
+
#
|
89
|
+
# Generates HTML from the Textile contents.
|
90
|
+
#
|
91
|
+
# r = RedCloth.new( "And then? She *fell*!" )
|
92
|
+
# r.to_html( true )
|
93
|
+
# #=>"And then? She <strong>fell</strong>!"
|
94
|
+
#
|
95
|
+
def to_html( *rules )
|
96
|
+
rules = DEFAULT_RULES if rules.empty?
|
97
|
+
# make our working copy
|
98
|
+
text = self.dup
|
99
|
+
|
100
|
+
return "" if text == ""
|
101
|
+
|
102
|
+
@urlrefs = {}
|
103
|
+
@shelf = []
|
104
|
+
@rules = rules.collect do |rule|
|
105
|
+
case rule
|
106
|
+
when :markdown
|
107
|
+
MARKDOWN_RULES
|
108
|
+
when :textile
|
109
|
+
TEXTILE_RULES
|
110
|
+
else
|
111
|
+
rule
|
112
|
+
end
|
113
|
+
end.flatten
|
114
|
+
|
115
|
+
# standard clean up
|
116
|
+
@pre_list = []
|
117
|
+
pre_process text
|
118
|
+
DEFAULT_RULES.each {|ruleset| send("#{ruleset}_pre_process", text) if private_methods.include? "#{ruleset}_pre_process"}
|
119
|
+
incoming_entities text
|
120
|
+
clean_white_space text
|
121
|
+
|
122
|
+
# start processor
|
123
|
+
no_textile text
|
124
|
+
rip_offtags text
|
125
|
+
hard_break text
|
126
|
+
unless @lite_mode
|
127
|
+
refs text
|
128
|
+
blocks text
|
129
|
+
end
|
130
|
+
inline text
|
131
|
+
smooth_offtags text
|
132
|
+
retrieve text
|
133
|
+
|
134
|
+
post_process text
|
135
|
+
DEFAULT_RULES.each {|ruleset| send("#{ruleset}_post_process", text) if private_methods.include? "#{ruleset}_post_process"}
|
136
|
+
|
137
|
+
clean_html text if filter_html
|
138
|
+
|
139
|
+
return text.strip
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
#######
|
144
|
+
private
|
145
|
+
#######
|
146
|
+
#
|
147
|
+
# Regular expressions to convert to HTML.
|
148
|
+
#
|
149
|
+
LB = "0docbook0line0break0"
|
150
|
+
NB = "0docbook0no0break0\n\n"
|
151
|
+
A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
|
152
|
+
A_VLGN = /[\-^~]/
|
153
|
+
C_CLAS = '(?:\([^)]+\))'
|
154
|
+
C_LNGE = '(?:\[[^\]]+\])'
|
155
|
+
C_STYL = '(?:\{[^}]+\})'
|
156
|
+
S_CSPN = '(?:\\\\\d+)'
|
157
|
+
S_RSPN = '(?:/\d+)'
|
158
|
+
A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
|
159
|
+
S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
|
160
|
+
C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
|
161
|
+
PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
|
162
|
+
PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
|
163
|
+
PUNCT_Q = Regexp::quote( '*-_+^~%' )
|
164
|
+
HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
|
165
|
+
|
166
|
+
TABLE_RE = /^(?:caption ?\{(.*?)\}\. ?\n)?^(?:id ?\{(.*?)\}\. ?\n)?^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
|
167
|
+
LISTS_RE = /^([#*_0-9]+?#{C} .*?)$(?![^#*])/m
|
168
|
+
LISTS_CONTENT_RE = /^([#*]+)([_0-9]*)(#{A}#{C}) (.*)$/m
|
169
|
+
DEFS_RE = /^(-#{C}\s.*?\:\=.*?)$(?![^-])/m
|
170
|
+
DEFS_CONTENT_RE = /^(-)(#{A}#{C})\s+(.*?):=(.*)$/m
|
171
|
+
BACKTICK_CODE_RE = /(.*?)
|
172
|
+
```
|
173
|
+
(?:\|(\w+?)\|)?
|
174
|
+
(.*?[^\\])
|
175
|
+
```
|
176
|
+
(.*?)/mx
|
177
|
+
CODE_RE = /(.*?)
|
178
|
+
@@?
|
179
|
+
(?:\|(\w+?)\|)?
|
180
|
+
(.*?[^\\])
|
181
|
+
@@?
|
182
|
+
(.*?)/x
|
183
|
+
BLOCKS_GROUP_RE = /\n{2,}(?! )/m
|
184
|
+
BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/
|
185
|
+
SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
|
186
|
+
ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
|
187
|
+
[ ]*
|
188
|
+
(.+?) # $2 = Header text
|
189
|
+
[ ]*
|
190
|
+
\#* # optional closing #'s (not counted)
|
191
|
+
$/x
|
192
|
+
LINK_RE = /
|
193
|
+
([\s\[{(]|[#{PUNCT}])? # $pre
|
194
|
+
" # start
|
195
|
+
(#{C}) # $atts
|
196
|
+
([^"]+?) # $text
|
197
|
+
\s?
|
198
|
+
(?:\(([^)]+?)\)(?="))? # $title
|
199
|
+
":
|
200
|
+
([^\s<]+?) # $url
|
201
|
+
(\/)? # $slash
|
202
|
+
([^\w\/;]*?) # $post
|
203
|
+
(?=<|\s|$)
|
204
|
+
/x
|
205
|
+
IMAGE_RE = /
|
206
|
+
(<p>|.|^) # start of line?
|
207
|
+
\! # opening
|
208
|
+
(\<|\=|\>)? # optional alignment atts
|
209
|
+
(#{C}) # optional style,class atts
|
210
|
+
(?:\. )? # optional dot-space
|
211
|
+
([^\s(!]+?) # presume this is the src
|
212
|
+
\s? # optional space
|
213
|
+
(?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
|
214
|
+
\! # closing
|
215
|
+
(?::#{ HYPERLINK })? # optional href
|
216
|
+
/x
|
217
|
+
|
218
|
+
# Text markup tags, don't conflict with block tags
|
219
|
+
SIMPLE_HTML_TAGS = [
|
220
|
+
'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
|
221
|
+
'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
|
222
|
+
'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
|
223
|
+
]
|
224
|
+
|
225
|
+
QTAGS = [
|
226
|
+
['**', 'b'],
|
227
|
+
['*', 'strong'],
|
228
|
+
['??', 'cite', :limit],
|
229
|
+
['-', 'del', :limit],
|
230
|
+
['__', 'i'],
|
231
|
+
['_', 'em', :limit],
|
232
|
+
['%', 'span', :limit],
|
233
|
+
['+', 'ins', :limit],
|
234
|
+
['^', 'sup'],
|
235
|
+
['~', 'sub']
|
236
|
+
]
|
237
|
+
QTAGS.collect! do |rc, ht, rtype|
|
238
|
+
rcq = Regexp::quote rc
|
239
|
+
re =
|
240
|
+
case rtype
|
241
|
+
when :limit
|
242
|
+
/(\W)
|
243
|
+
(#{rcq})
|
244
|
+
(#{C})
|
245
|
+
(?::(\S+?))?
|
246
|
+
(\S.*?\S|\S)
|
247
|
+
#{rcq}
|
248
|
+
(?=\W)/x
|
249
|
+
else
|
250
|
+
/(#{rcq})
|
251
|
+
(#{C})
|
252
|
+
(?::(\S+))?
|
253
|
+
(\S.*?\S|\S)
|
254
|
+
#{rcq}/xm
|
255
|
+
end
|
256
|
+
escaped_re =
|
257
|
+
case rtype
|
258
|
+
when :limit
|
259
|
+
/(\W)
|
260
|
+
(#{@@escape_keyword}#{rcq})
|
261
|
+
(#{C})
|
262
|
+
(?::(\S+?))?
|
263
|
+
(\S.*?\S|\S)
|
264
|
+
#{rcq}#{@@escape_keyword}
|
265
|
+
(?=\W)/x
|
266
|
+
else
|
267
|
+
/(#{@@escape_keyword}#{rcq})
|
268
|
+
(#{C})
|
269
|
+
(?::(\S+))?
|
270
|
+
(\S.*?\S|\S)
|
271
|
+
#{rcq}#{@@escape_keyword}/xm
|
272
|
+
end
|
273
|
+
[rc, ht, re, rtype, escaped_re]
|
274
|
+
end
|
275
|
+
|
276
|
+
# Elements to handle
|
277
|
+
GLYPHS = [
|
278
|
+
# [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1’\2' ], # single closing
|
279
|
+
[ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1’' ], # single closing
|
280
|
+
[ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '’' ], # single closing
|
281
|
+
[ /\'/, '‘' ], # single opening
|
282
|
+
# [ /([^\s\[{(])?"(\s|:|$)/, '\1”\2' ], # double closing
|
283
|
+
[ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1”' ], # double closing
|
284
|
+
[ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '”' ], # double closing
|
285
|
+
[ /"/, '“' ], # double opening
|
286
|
+
[ /\b( )?\.{3}/, '\1…' ], # ellipsis
|
287
|
+
[ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
|
288
|
+
[ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
|
289
|
+
[ /(\.\s)?\s?--\s?/, '\1—' ], # em dash
|
290
|
+
[ /(^|\s)->(\s|$)/, ' → ' ], # right arrow
|
291
|
+
[ /(^|\s)-(\s|$)/, ' – ' ], # en dash
|
292
|
+
[ /(\d+) ?x ?(\d+)/, '\1×\2' ], # dimension sign
|
293
|
+
[ /\b ?[(\[]TM[\])]/i, '™' ], # trademark
|
294
|
+
[ /\b ?[(\[]R[\])]/i, '®' ], # registered
|
295
|
+
[ /\b ?[(\[]C[\])]/i, '©' ] # copyright
|
296
|
+
]
|
297
|
+
|
298
|
+
H_ALGN_VALS = {
|
299
|
+
'<' => 'left',
|
300
|
+
'=' => 'center',
|
301
|
+
'>' => 'right',
|
302
|
+
'<>' => 'justify'
|
303
|
+
}
|
304
|
+
|
305
|
+
V_ALGN_VALS = {
|
306
|
+
'^' => 'top',
|
307
|
+
'-' => 'middle',
|
308
|
+
'~' => 'bottom'
|
309
|
+
}
|
310
|
+
|
311
|
+
OFFTAGS = /(code|pre|kbd|notextile)/i
|
312
|
+
OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }|\Z)/mi
|
313
|
+
OFFTAG_OPEN = /<#{ OFFTAGS }/
|
314
|
+
OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
|
315
|
+
|
316
|
+
HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
|
317
|
+
ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
|
318
|
+
|
319
|
+
def pre_process( text )
|
320
|
+
text.gsub!( /={2}\`\`\`={2}/, "XXXpreformatted_backticksXXX" )
|
321
|
+
end
|
322
|
+
|
323
|
+
def post_process( text )
|
324
|
+
text.gsub!( /XXXpreformatted_backticksXXX/, '```' )
|
325
|
+
text.gsub!( LB, "\n" )
|
326
|
+
text.gsub!( NB, "" )
|
327
|
+
text.gsub!( /<\/?notextile>/, '' )
|
328
|
+
text.gsub!( /x%x%/, '&' )
|
329
|
+
text << "</div>" if @div_atts
|
330
|
+
end
|
331
|
+
|
332
|
+
# Search and replace for glyphs (quotes, dashes, other symbols)
|
333
|
+
def pgl( text )
|
334
|
+
GLYPHS.each do |re, resub, tog|
|
335
|
+
next if tog and method( tog ).call
|
336
|
+
text.gsub! re, resub
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
# Parses attribute lists and builds an HTML attribute string
|
341
|
+
def pba( text_in, element = "" )
|
342
|
+
|
343
|
+
return '' unless text_in
|
344
|
+
|
345
|
+
style = []
|
346
|
+
text = text_in.dup
|
347
|
+
if element == 'td'
|
348
|
+
colspan = $1 if text =~ /\\(\d+)/
|
349
|
+
rowspan = $1 if text =~ /\/(\d+)/
|
350
|
+
style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
|
351
|
+
end
|
352
|
+
|
353
|
+
style << "#{ $1 };" if not filter_styles and
|
354
|
+
text.sub!( /\{([^}]*)\}/, '' )
|
355
|
+
|
356
|
+
lang = $1 if
|
357
|
+
text.sub!( /\[([^)]+?)\]/, '' )
|
358
|
+
|
359
|
+
cls = $1 if
|
360
|
+
text.sub!( /\(([^()]+?)\)/, '' )
|
361
|
+
|
362
|
+
style << "padding-left:#{ $1.length }em;" if
|
363
|
+
text.sub!( /([(]+)/, '' )
|
364
|
+
|
365
|
+
style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
|
366
|
+
|
367
|
+
style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
|
368
|
+
|
369
|
+
cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
|
370
|
+
|
371
|
+
atts = ''
|
372
|
+
atts << " style=\"#{ style.join }\"" unless style.empty?
|
373
|
+
atts << " class=\"#{ cls }\"" unless cls.to_s.empty? or filter_classes
|
374
|
+
atts << " lang=\"#{ lang }\"" if lang
|
375
|
+
atts << " id=\"#{ id }\"" if id and not filter_ids
|
376
|
+
atts << " colspan=\"#{ colspan }\"" if colspan
|
377
|
+
atts << " rowspan=\"#{ rowspan }\"" if rowspan
|
378
|
+
|
379
|
+
atts
|
380
|
+
end
|
381
|
+
|
382
|
+
#
|
383
|
+
# Flexible HTML escaping
|
384
|
+
#
|
385
|
+
def htmlesc( str, mode )
|
386
|
+
str.gsub!( '&', '&' )
|
387
|
+
str.gsub!( '"', '"' ) if mode != :NoQuotes
|
388
|
+
str.gsub!( "'", ''' ) if mode == :Quotes
|
389
|
+
str.gsub!( '<', '<')
|
390
|
+
str.gsub!( '>', '>')
|
391
|
+
end
|
392
|
+
|
393
|
+
def hard_break( text )
|
394
|
+
text.gsub!( /(.)\n(?!\n|\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
|
395
|
+
end
|
396
|
+
|
397
|
+
def lT( text )
|
398
|
+
text =~ /\#$/ ? 'o' : 'u'
|
399
|
+
end
|
400
|
+
|
401
|
+
BLOCK_GROUP_SPLITTER = "XXX_BLOCK_GROUP_XXX\n\n"
|
402
|
+
def blocks( text, deep_code = false )
|
403
|
+
@current_class ||= nil
|
404
|
+
|
405
|
+
# Find all occurences of div(class). and process them as blocks
|
406
|
+
text.gsub!( /^div\((.*?)\)\.\s*(.*?)(?=div\([^\)]+\)\.\s*)/m ) do |blk|
|
407
|
+
block_class = (@current_class == $1) ? nil : %{ class=#{$1.inspect}}
|
408
|
+
@current_class = $1
|
409
|
+
BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : textile_p('div', block_class, nil, "\n\n#{$2.strip}\n\n") )
|
410
|
+
end
|
411
|
+
|
412
|
+
# Take care of the very last div
|
413
|
+
text.sub!( /div\((.*?)\)\.\s*(.*)/m ) do |blk|
|
414
|
+
block_class = (@current_class == $1) ? nil : %{ class=#{$1.inspect}}
|
415
|
+
@current_class = $1
|
416
|
+
BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : textile_p('div', block_class, nil, "\n\n#{$2.strip}\n\n") )
|
417
|
+
end
|
418
|
+
|
419
|
+
# Handle the text now that the placeholders for divs are set, splitting at BLOCK_GROUP_SPLITTER
|
420
|
+
text.replace(text.strip.split(BLOCK_GROUP_SPLITTER.strip).map do |chunk|
|
421
|
+
block_groups(chunk, deep_code)
|
422
|
+
end.join)
|
423
|
+
end
|
424
|
+
|
425
|
+
def block_groups( text, deep_code = false )
|
426
|
+
text.replace text.split( BLOCKS_GROUP_RE ).collect { |blk| blk(blk, deep_code) }.join("\n")
|
427
|
+
end
|
428
|
+
|
429
|
+
# Surrounds blocks with paragraphs and shelves them when necessary
|
430
|
+
def blk( text, deep_code = false )
|
431
|
+
return text if text =~ /<[0-9]+>/
|
432
|
+
|
433
|
+
plain = text !~ /\A[#*> ]/
|
434
|
+
|
435
|
+
# skip blocks that are complex HTML
|
436
|
+
if text =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
|
437
|
+
text
|
438
|
+
else
|
439
|
+
# search for indentation levels
|
440
|
+
text.strip!
|
441
|
+
if text.empty?
|
442
|
+
text
|
443
|
+
else
|
444
|
+
code_blk = nil
|
445
|
+
text.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
|
446
|
+
flush_left iblk
|
447
|
+
blocks iblk, plain
|
448
|
+
iblk.gsub( /^(\S)/, "\\1" )
|
449
|
+
if plain
|
450
|
+
code_blk = iblk; ""
|
451
|
+
else
|
452
|
+
iblk
|
453
|
+
end
|
454
|
+
end
|
455
|
+
block_applied = 0
|
456
|
+
@rules.each do |rule_name|
|
457
|
+
block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( text ) )
|
458
|
+
end
|
459
|
+
if block_applied.zero?
|
460
|
+
if deep_code
|
461
|
+
text = "\t<pre><code>#{ text }</code></pre>\n"
|
462
|
+
else
|
463
|
+
text = "\t<p>#{ text }</p>\n"
|
464
|
+
end
|
465
|
+
end
|
466
|
+
# hard_break text
|
467
|
+
text << "\n#{ code_blk }"
|
468
|
+
end
|
469
|
+
return text
|
470
|
+
end
|
471
|
+
|
472
|
+
end
|
473
|
+
|
474
|
+
def refs( text )
|
475
|
+
@rules.each do |rule_name|
|
476
|
+
method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
def check_refs( text )
|
481
|
+
ret = @urlrefs[text.downcase] if text
|
482
|
+
ret || [text, nil]
|
483
|
+
end
|
484
|
+
|
485
|
+
# Puts text in storage and returns is placeholder
|
486
|
+
# e.g. shelve("some text") => <1>
|
487
|
+
def shelve( val )
|
488
|
+
@shelf << val
|
489
|
+
" <#{ @shelf.length }>"
|
490
|
+
end
|
491
|
+
|
492
|
+
# Retrieves text from storage using its placeholder
|
493
|
+
# e.g. retrieve("<1>") => "some text"
|
494
|
+
def retrieve( text )
|
495
|
+
@shelf.each_with_index do |r, i|
|
496
|
+
text.gsub!( " <#{ i + 1 }>" ){|m| r }
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
def incoming_entities( text )
|
501
|
+
## turn any incoming ampersands into a dummy character for now.
|
502
|
+
## This uses a negative lookahead for alphanumerics followed by a semicolon,
|
503
|
+
## implying an incoming html entity, to be skipped
|
504
|
+
|
505
|
+
text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
|
506
|
+
end
|
507
|
+
|
508
|
+
def clean_white_space( text )
|
509
|
+
# normalize line breaks
|
510
|
+
text.gsub!( /\r\n/, "\n" )
|
511
|
+
text.gsub!( /\r/, "\n" )
|
512
|
+
text.gsub!( /\t/, ' ' )
|
513
|
+
text.gsub!( /^ +$/, '' )
|
514
|
+
text.gsub!( /\n{3,}/, "\n\n" )
|
515
|
+
text.gsub!( /"$/, "\" " )
|
516
|
+
|
517
|
+
# if entire document is indented, flush
|
518
|
+
# to the left side
|
519
|
+
flush_left text
|
520
|
+
end
|
521
|
+
|
522
|
+
def flush_left( text )
|
523
|
+
indt = 0
|
524
|
+
if text =~ /^ /
|
525
|
+
while text !~ /^ {#{indt}}\S/
|
526
|
+
indt += 1
|
527
|
+
end unless text.empty?
|
528
|
+
if indt.nonzero?
|
529
|
+
text.gsub!( /^ {#{indt}}/, '' )
|
530
|
+
end
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
def footnote_ref( text )
|
535
|
+
text.gsub!( /\b\[([0-9]+?)\](\s)?/,
|
536
|
+
'<sup><a href="#fn\1">\1</a></sup>\2' )
|
537
|
+
end
|
538
|
+
|
539
|
+
def rip_offtags( text )
|
540
|
+
if text =~ /<.*>/
|
541
|
+
## strip and encode <pre> content
|
542
|
+
codepre, used_offtags = 0, {}
|
543
|
+
text.gsub!( OFFTAG_MATCH ) do |line|
|
544
|
+
if $3
|
545
|
+
offtag, aftertag = $4, $5
|
546
|
+
codepre += 1
|
547
|
+
used_offtags[offtag] = true
|
548
|
+
if codepre - used_offtags.length > 0
|
549
|
+
htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
|
550
|
+
@pre_list.last << line
|
551
|
+
line = ""
|
552
|
+
else
|
553
|
+
htmlesc( aftertag, :NoQuotes ) if aftertag and not used_offtags['notextile']
|
554
|
+
line = "<redpre##{ @pre_list.length }>"
|
555
|
+
@pre_list << "#{ $3 }#{ aftertag }"
|
556
|
+
end
|
557
|
+
elsif $1 and codepre > 0
|
558
|
+
if codepre - used_offtags.length > 0
|
559
|
+
htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
|
560
|
+
@pre_list.last << line
|
561
|
+
line = ""
|
562
|
+
end
|
563
|
+
codepre -= 1 unless codepre.zero?
|
564
|
+
used_offtags = {} if codepre.zero?
|
565
|
+
end
|
566
|
+
line
|
567
|
+
end
|
568
|
+
end
|
569
|
+
text
|
570
|
+
end
|
571
|
+
|
572
|
+
def smooth_offtags( text )
|
573
|
+
unless @pre_list.empty?
|
574
|
+
## replace <pre> content
|
575
|
+
text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
def inline( text )
|
580
|
+
[/^inline_/, /^glyphs_/].each do |meth_re|
|
581
|
+
@rules.each do |rule_name|
|
582
|
+
method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
|
583
|
+
end
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
def h_align( text )
|
588
|
+
H_ALGN_VALS[text]
|
589
|
+
end
|
590
|
+
|
591
|
+
def v_align( text )
|
592
|
+
V_ALGN_VALS[text]
|
593
|
+
end
|
594
|
+
|
595
|
+
# HTML cleansing stuff
|
596
|
+
BASIC_TAGS = {
|
597
|
+
'a' => ['href', 'title'],
|
598
|
+
'img' => ['src', 'alt', 'title'],
|
599
|
+
'br' => [],
|
600
|
+
'i' => nil,
|
601
|
+
'u' => nil,
|
602
|
+
'b' => nil,
|
603
|
+
'pre' => nil,
|
604
|
+
'kbd' => nil,
|
605
|
+
'code' => ['lang'],
|
606
|
+
'cite' => nil,
|
607
|
+
'strong' => nil,
|
608
|
+
'em' => nil,
|
609
|
+
'ins' => nil,
|
610
|
+
'sup' => nil,
|
611
|
+
'sub' => nil,
|
612
|
+
'del' => nil,
|
613
|
+
'table' => nil,
|
614
|
+
'tr' => nil,
|
615
|
+
'td' => ['colspan', 'rowspan'],
|
616
|
+
'th' => nil,
|
617
|
+
'ol' => ['start'],
|
618
|
+
'ul' => nil,
|
619
|
+
'li' => nil,
|
620
|
+
'p' => nil,
|
621
|
+
'h1' => nil,
|
622
|
+
'h2' => nil,
|
623
|
+
'h3' => nil,
|
624
|
+
'h4' => nil,
|
625
|
+
'h5' => nil,
|
626
|
+
'h6' => nil,
|
627
|
+
'blockquote' => ['cite']
|
628
|
+
}
|
629
|
+
|
630
|
+
def clean_html( text, tags = BASIC_TAGS )
|
631
|
+
text.gsub!( /<!\[CDATA\[/, '' )
|
632
|
+
text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
|
633
|
+
raw = $~
|
634
|
+
tag = raw[2].downcase
|
635
|
+
if tags.has_key? tag
|
636
|
+
pcs = [tag]
|
637
|
+
tags[tag].each do |prop|
|
638
|
+
['"', "'", ''].each do |q|
|
639
|
+
q2 = ( q != '' ? q : '\s' )
|
640
|
+
if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
|
641
|
+
attrv = $1
|
642
|
+
next if (prop == 'src' or prop == 'href') and not attrv =~ %r{^(http|https|ftp):}
|
643
|
+
pcs << "#{prop}=\"#{attrv.gsub('"', '\\"')}\""
|
644
|
+
break
|
645
|
+
end
|
646
|
+
end
|
647
|
+
end if tags[tag]
|
648
|
+
"<#{raw[1]}#{pcs.join " "}>"
|
649
|
+
else
|
650
|
+
" "
|
651
|
+
end
|
652
|
+
end
|
653
|
+
end
|
654
|
+
|
655
|
+
AUTO_LINK_RE = /
|
656
|
+
( # leading text
|
657
|
+
<\w+.*?>| # leading HTML tag, or
|
658
|
+
[^=!:'"\/]| # leading punctuation, or
|
659
|
+
^ # beginning of line
|
660
|
+
)
|
661
|
+
(
|
662
|
+
(?:http[s]?:\/\/)| # protocol spec, or
|
663
|
+
(?:www\.) # www.*
|
664
|
+
)
|
665
|
+
(
|
666
|
+
([\w]+[=?&:%\/\.\~\-]*)* # url segment
|
667
|
+
\w+[\/]? # url tail
|
668
|
+
(?:\#\w*)? # trailing anchor
|
669
|
+
)
|
670
|
+
([[:punct:]]|\s|<|$) # trailing text
|
671
|
+
/x
|
672
|
+
|
673
|
+
end
|
674
|
+
|