wikiwah 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.libs << "lib"
9
+ t.test_files = FileList['test/**/*_tests.rb']
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi' # for escapeHTML
4
+
5
+ class WikiWah
6
+
7
+ # Flow deals with block-level formatting in WikiWah. Input text is split
8
+ # into paragraphs, separated by blank lines. A list-item bullet also
9
+ # implies a new paragraph.
10
+ #
11
+ # Flow keeps track of the current level of indentation, and emits
12
+ # block-start and block-end tags (e.g. "<li>", "</li>") as required.
13
+ #
14
+ # Flow recognises the following types of blocks:
15
+ #
16
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
17
+ # the number of "=" characters.
18
+ #
19
+ # - A line beginning with "*" or "-" is an unordered list item.
20
+ #
21
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
22
+ #
23
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
24
+ #
25
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
26
+ #
27
+ # - Anything else is plain old body text.
28
+ #
29
+ class Flow
30
+
31
+ # Convert +input+ text to HTML.
32
+ #
33
+ # An optional +filter+ block may be provided, in which case it's
34
+ # applied to the body of each block.
35
+ def Flow.convert(input, &filter)
36
+ buff = ''
37
+ parser = Flow.new(buff,filter)
38
+ parser.process(input)
39
+ buff
40
+ end
41
+
42
+ # Patterns that start a new block
43
+ BlankRegexp = /\A *$/
44
+ BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
45
+
46
+ def initialize(out, text_filter=null)
47
+ @out = out
48
+ @text_filter = text_filter
49
+ @context_stack = [TopContext]
50
+ @block_buffer = nil
51
+ end
52
+
53
+ # Process a multi-line input string
54
+ def process(input)
55
+ add_input(input)
56
+ flush_context_stack
57
+ end
58
+
59
+ private
60
+
61
+ # Process multi-line input
62
+ def add_input(input)
63
+ input.each do |line|
64
+ if (line =~ BlankRegexp)
65
+ start_new_block
66
+ else
67
+ if (line =~ BulletRegexp)
68
+ start_new_block
69
+ end
70
+ append_to_block(line)
71
+ end
72
+ end
73
+ start_new_block
74
+ end
75
+
76
+ # Append a line to the block
77
+ def append_to_block(line)
78
+ @block_buffer = (@block_buffer || '') + line
79
+ end
80
+
81
+ # Flush the buffered block
82
+ def start_new_block
83
+ if (@block_buffer)
84
+ add_block(@block_buffer)
85
+ @block_buffer = nil
86
+ end
87
+ end
88
+
89
+ # Add a block
90
+ def add_block(block)
91
+ case block
92
+ when /\A( *)- / # unordered list item
93
+ push_context('ul',$1.size)
94
+ write_tag($', 'li')
95
+ when /\A( *)\* / # unordered list item
96
+ push_context('ul class="sparse"',$1.size)
97
+ write_tag($', 'li')
98
+ when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
99
+ push_context('ol',$1.size)
100
+ write_tag($', 'li')
101
+ when /\A( *)% / # unordered list item
102
+ push_context('dl',$1.size)
103
+ write_tag($', 'dt')
104
+ when /\A(( *)> )/ # citation
105
+ push_context('blockquote',$2.size)
106
+ block = strip_prefix($1, block)
107
+ write_text(block)
108
+ when /\A(( *)\| )/ # preformatted (explicit)
109
+ push_context('pre',$2.size)
110
+ block = strip_prefix($1, block)
111
+ write_html(CGI.escapeHTML(block))
112
+ when /\A( *)(=+) / # heading
113
+ flush_context_stack
114
+ write_tag($', "h#{$2.size}")
115
+ when /\A( *)/ # body text
116
+ tag = \
117
+ if $1 == ""
118
+ 'p'
119
+ elsif context.tag == 'dl'
120
+ 'dd'
121
+ else
122
+ 'blockquote'
123
+ end
124
+ push_context(tag,$1.size,true)
125
+ block = strip_prefix($1, block)
126
+ write_text(block)
127
+ end
128
+ end
129
+
130
+ def strip_prefix(prefix, text)
131
+ pattern = '^' + Regexp.quote(prefix)
132
+ pattern.sub!(/\\ $/, '( |$)')
133
+ regexp = Regexp.new(pattern)
134
+ text.gsub(regexp, '')
135
+ end
136
+
137
+ # Write a balanced tag
138
+ def write_tag(content, tag)
139
+ write_html("<#{tag}>\n")
140
+ write_text(content)
141
+ write_html("</#{tag}>\n")
142
+ end
143
+
144
+ # Write HTML markup
145
+ def write_html(html)
146
+ @out << html
147
+ end
148
+
149
+ # Write text content, performing any necessary substitutions
150
+ def write_text(text)
151
+ if (@text_filter)
152
+ text = @text_filter.call(text)
153
+ end
154
+ @out << text
155
+ end
156
+
157
+ Context = Struct.new('Context', :tag, :level)
158
+ TopContext = Context.new(:top, -1)
159
+
160
+ # Get the current Context
161
+ def context
162
+ @context_stack.last
163
+ end
164
+
165
+ # Push a new Context on the stack
166
+ def push_context(tag_with_arguments, level, separate_same=false)
167
+ match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
168
+ tag = match[1]
169
+ arguments = match[2]
170
+ original_level = context.level
171
+ pop_context_to_level(level)
172
+ if (context.level == level)
173
+ if (context.tag != tag)
174
+ pop_context
175
+ elsif (separate_same)
176
+ write_html("</#{tag}>\n")
177
+ write_html("<#{tag}#{arguments}>\n")
178
+ end
179
+ end
180
+ if (context.level < level)
181
+ write_html("<#{tag}#{arguments}>\n")
182
+ @context_stack << Context.new(tag,level)
183
+ end
184
+ end
185
+
186
+ # Pop topmost Context from the stack
187
+ def pop_context
188
+ if (context == TopContext)
189
+ raise "can't pop at top"
190
+ end
191
+ cxt = @context_stack.pop
192
+ write_html("</#{cxt.tag}>\n")
193
+ end
194
+
195
+ def pop_context_to_level(level)
196
+ while (context.level > level)
197
+ pop_context
198
+ end
199
+ end
200
+
201
+ # Pop all Contexts from the stack
202
+ def flush_context_stack
203
+ while (context != TopContext)
204
+ pop_context
205
+ end
206
+ end
207
+
208
+ end
209
+
210
+ end
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi' # for escapeHTML
4
+
5
+ class WikiWah
6
+
7
+ # Flow deals with block-level formatting in WikiWah. Input text is split
8
+ # into paragraphs, separated by blank lines. A list-item bullet also
9
+ # implies a new paragraph.
10
+ #
11
+ # Flow keeps track of the current level of indentation, and emits
12
+ # block-start and block-end tags (e.g. "<li>", "</li>") as required.
13
+ #
14
+ # Flow recognises the following types of blocks:
15
+ #
16
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
17
+ # the number of "=" characters.
18
+ #
19
+ # - A line beginning with "*" or "-" is an unordered list item.
20
+ #
21
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
22
+ #
23
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
24
+ #
25
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
26
+ #
27
+ # - Anything else is plain old body text.
28
+ #
29
+ class Flow
30
+
31
+ # Convert +input+ text to HTML.
32
+ #
33
+ # An optional +filter+ block may be provided, in which case it's
34
+ # applied to the body of each block.
35
+ def Flow.convert(input, &filter)
36
+ buff = ''
37
+ parser = Flow.new(buff,filter)
38
+ parser.process(input)
39
+ buff
40
+ end
41
+
42
+ # Patterns that start a new block
43
+ BlankRegexp = /\A *$/
44
+ BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
45
+
46
+ def initialize(out, text_filter=null)
47
+ @out = out
48
+ @text_filter = text_filter
49
+ @context_stack = [TopContext]
50
+ @block_buffer = nil
51
+ end
52
+
53
+ # Process a multi-line input string
54
+ def process(input)
55
+ add_input(input)
56
+ flush_context_stack
57
+ end
58
+
59
+ private
60
+
61
+ # Process multi-line input
62
+ def add_input(input)
63
+ input.each do |line|
64
+ if (line =~ BlankRegexp)
65
+ start_new_block
66
+ else
67
+ if (line =~ BulletRegexp)
68
+ start_new_block
69
+ end
70
+ append_to_block(line)
71
+ end
72
+ end
73
+ start_new_block
74
+ end
75
+
76
+ # Append a line to the block
77
+ def append_to_block(line)
78
+ @block_buffer = (@block_buffer || '') + line
79
+ end
80
+
81
+ # Flush the buffered block
82
+ def start_new_block
83
+ if (@block_buffer)
84
+ add_block(@block_buffer)
85
+ @block_buffer = nil
86
+ end
87
+ end
88
+
89
+ # Add a block
90
+ def add_block(block)
91
+ case block
92
+ when /\A( *)- / # unordered list item
93
+ push_context('ul',$1.size)
94
+ write_tag($', 'li')
95
+ when /\A( *)\* / # unordered list item
96
+ push_context('ul class="sparse"',$1.size)
97
+ write_tag($', 'li')
98
+ when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
99
+ push_context('ol',$1.size)
100
+ write_tag($', 'li')
101
+ when /\A( *)% / # unordered list item
102
+ push_context('dl',$1.size)
103
+ write_tag($', 'dt')
104
+ when /\A(( *)> )/ # citation
105
+ push_context('blockquote',$2.size)
106
+ block = strip_prefix($1, block)
107
+ write_text(block)
108
+ when /\A(( *)\| )/ # preformatted (explicit)
109
+ push_context('pre',$2.size)
110
+ block = strip_prefix($1, block)
111
+ write_html(CGI.escapeHTML(block))
112
+ when /\A( *)(=+) / # heading
113
+ flush_context_stack
114
+ write_tag($', "h#{$2.size}")
115
+ when /\A( *)/ # body text
116
+ tag = \
117
+ if $1 == ""
118
+ 'p'
119
+ elsif context.tag == 'dl'
120
+ 'dd'
121
+ else
122
+ 'blockquote'
123
+ end
124
+ push_context(tag,$1.size,true)
125
+ block = strip_prefix($1, block)
126
+ write_text(block)
127
+ end
128
+ end
129
+
130
+ def strip_prefix(prefix, text)
131
+ pattern = '^' + Regexp.quote(prefix)
132
+ pattern.sub!(/\\ $/, '( |$)')
133
+ regexp = Regexp.new(pattern)
134
+ text.gsub(regexp, '')
135
+ end
136
+
137
+ # Write a balanced tag
138
+ def write_tag(content, tag)
139
+ write_html("<#{tag}>\n")
140
+ write_text(content)
141
+ write_html("</#{tag}>\n")
142
+ end
143
+
144
+ # Write HTML markup
145
+ def write_html(html)
146
+ @out << html
147
+ end
148
+
149
+ # Write text content, performing any necessary substitutions
150
+ def write_text(text)
151
+ if (@text_filter)
152
+ text = @text_filter.call(text)
153
+ end
154
+ @out << text
155
+ end
156
+
157
+ Context = Struct.new('Context', :tag, :level)
158
+ TopContext = Context.new(:top, -1)
159
+
160
+ # Get the current Context
161
+ def context
162
+ @context_stack.last
163
+ end
164
+
165
+ # Push a new Context on the stack
166
+ def push_context(tag_with_arguments, level, separate_same=false)
167
+ match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
168
+ tag = match[1]
169
+ arguments = match[2]
170
+ original_level = context.level
171
+ pop_context_to_level(level)
172
+ if (context.level == level)
173
+ if (context.tag != tag)
174
+ pop_context
175
+ elsif (separate_same)
176
+ write_html("</#{tag}>\n")
177
+ write_html("<#{tag}#{arguments}>\n")
178
+ end
179
+ end
180
+ if (context.level < level)
181
+ write_html("<#{tag}#{arguments}>\n")
182
+ @context_stack << Context.new(tag,level)
183
+ end
184
+ end
185
+
186
+ # Pop topmost Context from the stack
187
+ def pop_context
188
+ if (context == TopContext)
189
+ raise "can't pop at top"
190
+ end
191
+ cxt = @context_stack.pop
192
+ write_html("</#{cxt.tag}>\n")
193
+ end
194
+
195
+ def pop_context_to_level(level)
196
+ while (context.level > level)
197
+ pop_context
198
+ end
199
+ end
200
+
201
+ # Pop all Contexts from the stack
202
+ def flush_context_stack
203
+ while (context != TopContext)
204
+ pop_context
205
+ end
206
+ end
207
+
208
+ end
209
+
210
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class WikiWah
4
+
5
+ # Subst handles text-transformation using a series of regular-expression
6
+ # substitutions. It encapsulates a number of "patterns", and associated
7
+ # blocks. Each block is invoked with a MatchData object when it's
8
+ # associated pattern matches, and is expected to return a replacement
9
+ # string.
10
+ #
11
+ # The difference between using Subst and applying a series of gsub's is
12
+ # that replacement values are protected from subsequent transformations.
13
+ class Subst
14
+
15
+ def initialize
16
+ @transforms = []
17
+ end
18
+
19
+ def add_transformation(regexp, &proc)
20
+ @transforms << [regexp, proc]
21
+ end
22
+
23
+ def transform(s)
24
+ s = s.dup
25
+ store = []
26
+ @transforms.each do |transform|
27
+ (regexp, proc) = *transform
28
+ s.gsub!(regexp) {
29
+ store << proc.call($~)
30
+ "\001#{store.size - 1}\002"
31
+ }
32
+ end
33
+ s.gsub!(/\001(\d+)\002/) {
34
+ store[$1.to_i]
35
+ }
36
+ s
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class WikiWah
4
+
5
+ # Subst handles text-transformation using a series of regular-expression
6
+ # substitutions. It encapsulates a number of "patterns", and associated
7
+ # blocks. Each block is invoked with a MatchData object when it's
8
+ # associated pattern matches, and is expected to return a replacement
9
+ # string.
10
+ #
11
+ # The difference between using Subst and applying a series of gsub's is
12
+ # that replacement values are protected from subsequent transformations.
13
+ class Subst
14
+
15
+ def initialize
16
+ @transforms = []
17
+ end
18
+
19
+ def add_transformation(regexp, &proc)
20
+ @transforms << [regexp, proc]
21
+ end
22
+
23
+ def transform(s)
24
+ s = s.dup
25
+ store = []
26
+ @transforms.each do |transform|
27
+ (regexp, proc) = *transform
28
+ s.gsub!(regexp) {
29
+ store << proc.call($~)
30
+ "\001#{store.size - 1}\002"
31
+ }
32
+ end
33
+ s.gsub!(/\001(\d+)\002/) {
34
+ store[$1.to_i]
35
+ }
36
+ s
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,3 @@
1
+ class WikiWah
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ class Wikiwah
2
+ VERSION = "0.0.1"
3
+ end
data/lib/wikiwah.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'wikiwah/flow'
2
+ require 'wikiwah/subst'
3
+ require 'wikiwah/version'
4
+
5
+ # A formatter for turning Wiki-esque text into HTML.
6
+ #
7
+ # = Block-level markup
8
+ #
9
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
10
+ # the number of "=" characters.
11
+ #
12
+ # - A line beginning with "*" or "-" is an unordered list item.
13
+ #
14
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
15
+ #
16
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
17
+ #
18
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
19
+ #
20
+ # = Text markup
21
+ #
22
+ # - HTML tags are rendered verbatim.
23
+ #
24
+ # - Text may by marked *bold*, /italic/, _underlined_, +monospace+
25
+ #
26
+ # - Text may be quoted with '{{{' and '}}}'
27
+ #
28
+ # - URLs turn into links.
29
+ #
30
+ # - "{LOCATION}" creates a link to LOCATION.
31
+ #
32
+ # - "{LABEL}@LOCATION" creates a link to LOCATION, with the specified
33
+ # LABEL.
34
+ #
35
+ class WikiWah
36
+
37
+ attr_writer :link_translator
38
+
39
+ def initialize
40
+ @link_translator = proc do |link| link end
41
+ init_transformer
42
+ end
43
+
44
+ # Convert WikiWah text to HTML.
45
+ def to_html(text)
46
+ Flow.convert(text) do |paragraph|
47
+ @transformer.transform(paragraph)
48
+ end
49
+ end
50
+
51
+ def self.to_html(text)
52
+ self.new.to_html(text)
53
+ end
54
+
55
+ private
56
+
57
+ def translate_link(link)
58
+ @link_translator.call(link)
59
+ end
60
+
61
+ def init_transformer
62
+ @transformer = WikiWah::Subst.new
63
+ @transformer.add_transformation(/""(.+)""/) do |match|
64
+ # Double-double-quoted
65
+ CGI.escapeHTML(match[1])
66
+ end
67
+ @transformer.add_transformation(/\\(.)/) do |match|
68
+ # Backslash-quoted
69
+ match[1]
70
+ end
71
+ @transformer.add_transformation(/\<(.+?)\>/m) do |match|
72
+ # In-line HTML
73
+ match[0]
74
+ end
75
+ @transformer.add_transformation(/\{(.+?)\}(@(\S*[\w\/]))?/m) do |match|
76
+ # Distinuished link
77
+ label = @transformer.transform(match[1])
78
+ location = translate_link(match[3] || match[1])
79
+ if location
80
+ "<a href='#{location}'>#{label}</a>"
81
+ else
82
+ "{#{label}}"
83
+ end
84
+ end
85
+ @transformer.add_transformation(/\b[a-z]+:[\w\/]\S*[\w\/]/) do |match|
86
+ # URL
87
+ "<a href='#{match[0]}'>#{match[0]}</a>"
88
+ end
89
+ @transformer.add_transformation(%r[(^|\W)([*+_/])([*+_/]*\w.*?\w[*+_/]*)\2(?!\w)]) do |match|
90
+ # Bold/italic/etc.
91
+ tag = case match[2]
92
+ when '*'; 'strong'
93
+ when '+'; 'tt'
94
+ when '/'; 'em'
95
+ when '_'; 'u'
96
+ end
97
+ content = @transformer.transform(match[3])
98
+ (match[1] + '<' + tag + '>' + content + '</' + tag + '>')
99
+ end
100
+ end
101
+
102
+ end
103
+