wikiwah 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.libs << "lib"
9
+ t.test_files = FileList['test/**/*_tests.rb']
10
+ end
11
+
12
+ task :default => :test
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi' # for escapeHTML
4
+
5
+ class WikiWah
6
+
7
+ # Flow deals with block-level formatting in WikiWah. Input text is split
8
+ # into paragraphs, separated by blank lines. A list-item bullet also
9
+ # implies a new paragraph.
10
+ #
11
+ # Flow keeps track of the current level of indentation, and emits
12
+ # block-start and block-end tags (e.g. "<li>", "</li>") as required.
13
+ #
14
+ # Flow recognises the following types of blocks:
15
+ #
16
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
17
+ # the number of "=" characters.
18
+ #
19
+ # - A line beginning with "*" or "-" is an unordered list item.
20
+ #
21
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
22
+ #
23
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
24
+ #
25
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
26
+ #
27
+ # - Anything else is plain old body text.
28
+ #
29
+ class Flow
30
+
31
+ # Convert +input+ text to HTML.
32
+ #
33
+ # An optional +filter+ block may be provided, in which case it's
34
+ # applied to the body of each block.
35
+ def Flow.convert(input, &filter)
36
+ buff = ''
37
+ parser = Flow.new(buff,filter)
38
+ parser.process(input)
39
+ buff
40
+ end
41
+
42
+ # Patterns that start a new block
43
+ BlankRegexp = /\A *$/
44
+ BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
45
+
46
+ def initialize(out, text_filter=null)
47
+ @out = out
48
+ @text_filter = text_filter
49
+ @context_stack = [TopContext]
50
+ @block_buffer = nil
51
+ end
52
+
53
+ # Process a multi-line input string
54
+ def process(input)
55
+ add_input(input)
56
+ flush_context_stack
57
+ end
58
+
59
+ private
60
+
61
+ # Process multi-line input
62
+ def add_input(input)
63
+ input.each do |line|
64
+ if (line =~ BlankRegexp)
65
+ start_new_block
66
+ else
67
+ if (line =~ BulletRegexp)
68
+ start_new_block
69
+ end
70
+ append_to_block(line)
71
+ end
72
+ end
73
+ start_new_block
74
+ end
75
+
76
+ # Append a line to the block
77
+ def append_to_block(line)
78
+ @block_buffer = (@block_buffer || '') + line
79
+ end
80
+
81
+ # Flush the buffered block
82
+ def start_new_block
83
+ if (@block_buffer)
84
+ add_block(@block_buffer)
85
+ @block_buffer = nil
86
+ end
87
+ end
88
+
89
+ # Add a block
90
+ def add_block(block)
91
+ case block
92
+ when /\A( *)- / # unordered list item
93
+ push_context('ul',$1.size)
94
+ write_tag($', 'li')
95
+ when /\A( *)\* / # unordered list item
96
+ push_context('ul class="sparse"',$1.size)
97
+ write_tag($', 'li')
98
+ when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
99
+ push_context('ol',$1.size)
100
+ write_tag($', 'li')
101
+ when /\A( *)% / # unordered list item
102
+ push_context('dl',$1.size)
103
+ write_tag($', 'dt')
104
+ when /\A(( *)> )/ # citation
105
+ push_context('blockquote',$2.size)
106
+ block = strip_prefix($1, block)
107
+ write_text(block)
108
+ when /\A(( *)\| )/ # preformatted (explicit)
109
+ push_context('pre',$2.size)
110
+ block = strip_prefix($1, block)
111
+ write_html(CGI.escapeHTML(block))
112
+ when /\A( *)(=+) / # heading
113
+ flush_context_stack
114
+ write_tag($', "h#{$2.size}")
115
+ when /\A( *)/ # body text
116
+ tag = \
117
+ if $1 == ""
118
+ 'p'
119
+ elsif context.tag == 'dl'
120
+ 'dd'
121
+ else
122
+ 'blockquote'
123
+ end
124
+ push_context(tag,$1.size,true)
125
+ block = strip_prefix($1, block)
126
+ write_text(block)
127
+ end
128
+ end
129
+
130
+ def strip_prefix(prefix, text)
131
+ pattern = '^' + Regexp.quote(prefix)
132
+ pattern.sub!(/\\ $/, '( |$)')
133
+ regexp = Regexp.new(pattern)
134
+ text.gsub(regexp, '')
135
+ end
136
+
137
+ # Write a balanced tag
138
+ def write_tag(content, tag)
139
+ write_html("<#{tag}>\n")
140
+ write_text(content)
141
+ write_html("</#{tag}>\n")
142
+ end
143
+
144
+ # Write HTML markup
145
+ def write_html(html)
146
+ @out << html
147
+ end
148
+
149
+ # Write text content, performing any necessary substitutions
150
+ def write_text(text)
151
+ if (@text_filter)
152
+ text = @text_filter.call(text)
153
+ end
154
+ @out << text
155
+ end
156
+
157
+ Context = Struct.new('Context', :tag, :level)
158
+ TopContext = Context.new(:top, -1)
159
+
160
+ # Get the current Context
161
+ def context
162
+ @context_stack.last
163
+ end
164
+
165
+ # Push a new Context on the stack
166
+ def push_context(tag_with_arguments, level, separate_same=false)
167
+ match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
168
+ tag = match[1]
169
+ arguments = match[2]
170
+ original_level = context.level
171
+ pop_context_to_level(level)
172
+ if (context.level == level)
173
+ if (context.tag != tag)
174
+ pop_context
175
+ elsif (separate_same)
176
+ write_html("</#{tag}>\n")
177
+ write_html("<#{tag}#{arguments}>\n")
178
+ end
179
+ end
180
+ if (context.level < level)
181
+ write_html("<#{tag}#{arguments}>\n")
182
+ @context_stack << Context.new(tag,level)
183
+ end
184
+ end
185
+
186
+ # Pop topmost Context from the stack
187
+ def pop_context
188
+ if (context == TopContext)
189
+ raise "can't pop at top"
190
+ end
191
+ cxt = @context_stack.pop
192
+ write_html("</#{cxt.tag}>\n")
193
+ end
194
+
195
+ def pop_context_to_level(level)
196
+ while (context.level > level)
197
+ pop_context
198
+ end
199
+ end
200
+
201
+ # Pop all Contexts from the stack
202
+ def flush_context_stack
203
+ while (context != TopContext)
204
+ pop_context
205
+ end
206
+ end
207
+
208
+ end
209
+
210
+ end
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi' # for escapeHTML
4
+
5
+ class WikiWah
6
+
7
+ # Flow deals with block-level formatting in WikiWah. Input text is split
8
+ # into paragraphs, separated by blank lines. A list-item bullet also
9
+ # implies a new paragraph.
10
+ #
11
+ # Flow keeps track of the current level of indentation, and emits
12
+ # block-start and block-end tags (e.g. "<li>", "</li>") as required.
13
+ #
14
+ # Flow recognises the following types of blocks:
15
+ #
16
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
17
+ # the number of "=" characters.
18
+ #
19
+ # - A line beginning with "*" or "-" is an unordered list item.
20
+ #
21
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
22
+ #
23
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
24
+ #
25
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
26
+ #
27
+ # - Anything else is plain old body text.
28
+ #
29
+ class Flow
30
+
31
+ # Convert +input+ text to HTML.
32
+ #
33
+ # An optional +filter+ block may be provided, in which case it's
34
+ # applied to the body of each block.
35
+ def Flow.convert(input, &filter)
36
+ buff = ''
37
+ parser = Flow.new(buff,filter)
38
+ parser.process(input)
39
+ buff
40
+ end
41
+
42
+ # Patterns that start a new block
43
+ BlankRegexp = /\A *$/
44
+ BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
45
+
46
+ def initialize(out, text_filter=null)
47
+ @out = out
48
+ @text_filter = text_filter
49
+ @context_stack = [TopContext]
50
+ @block_buffer = nil
51
+ end
52
+
53
+ # Process a multi-line input string
54
+ def process(input)
55
+ add_input(input)
56
+ flush_context_stack
57
+ end
58
+
59
+ private
60
+
61
+ # Process multi-line input
62
+ def add_input(input)
63
+ input.each do |line|
64
+ if (line =~ BlankRegexp)
65
+ start_new_block
66
+ else
67
+ if (line =~ BulletRegexp)
68
+ start_new_block
69
+ end
70
+ append_to_block(line)
71
+ end
72
+ end
73
+ start_new_block
74
+ end
75
+
76
+ # Append a line to the block
77
+ def append_to_block(line)
78
+ @block_buffer = (@block_buffer || '') + line
79
+ end
80
+
81
+ # Flush the buffered block
82
+ def start_new_block
83
+ if (@block_buffer)
84
+ add_block(@block_buffer)
85
+ @block_buffer = nil
86
+ end
87
+ end
88
+
89
+ # Add a block
90
+ def add_block(block)
91
+ case block
92
+ when /\A( *)- / # unordered list item
93
+ push_context('ul',$1.size)
94
+ write_tag($', 'li')
95
+ when /\A( *)\* / # unordered list item
96
+ push_context('ul class="sparse"',$1.size)
97
+ write_tag($', 'li')
98
+ when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
99
+ push_context('ol',$1.size)
100
+ write_tag($', 'li')
101
+ when /\A( *)% / # unordered list item
102
+ push_context('dl',$1.size)
103
+ write_tag($', 'dt')
104
+ when /\A(( *)> )/ # citation
105
+ push_context('blockquote',$2.size)
106
+ block = strip_prefix($1, block)
107
+ write_text(block)
108
+ when /\A(( *)\| )/ # preformatted (explicit)
109
+ push_context('pre',$2.size)
110
+ block = strip_prefix($1, block)
111
+ write_html(CGI.escapeHTML(block))
112
+ when /\A( *)(=+) / # heading
113
+ flush_context_stack
114
+ write_tag($', "h#{$2.size}")
115
+ when /\A( *)/ # body text
116
+ tag = \
117
+ if $1 == ""
118
+ 'p'
119
+ elsif context.tag == 'dl'
120
+ 'dd'
121
+ else
122
+ 'blockquote'
123
+ end
124
+ push_context(tag,$1.size,true)
125
+ block = strip_prefix($1, block)
126
+ write_text(block)
127
+ end
128
+ end
129
+
130
+ def strip_prefix(prefix, text)
131
+ pattern = '^' + Regexp.quote(prefix)
132
+ pattern.sub!(/\\ $/, '( |$)')
133
+ regexp = Regexp.new(pattern)
134
+ text.gsub(regexp, '')
135
+ end
136
+
137
+ # Write a balanced tag
138
+ def write_tag(content, tag)
139
+ write_html("<#{tag}>\n")
140
+ write_text(content)
141
+ write_html("</#{tag}>\n")
142
+ end
143
+
144
+ # Write HTML markup
145
+ def write_html(html)
146
+ @out << html
147
+ end
148
+
149
+ # Write text content, performing any necessary substitutions
150
+ def write_text(text)
151
+ if (@text_filter)
152
+ text = @text_filter.call(text)
153
+ end
154
+ @out << text
155
+ end
156
+
157
+ Context = Struct.new('Context', :tag, :level)
158
+ TopContext = Context.new(:top, -1)
159
+
160
+ # Get the current Context
161
+ def context
162
+ @context_stack.last
163
+ end
164
+
165
+ # Push a new Context on the stack
166
+ def push_context(tag_with_arguments, level, separate_same=false)
167
+ match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
168
+ tag = match[1]
169
+ arguments = match[2]
170
+ original_level = context.level
171
+ pop_context_to_level(level)
172
+ if (context.level == level)
173
+ if (context.tag != tag)
174
+ pop_context
175
+ elsif (separate_same)
176
+ write_html("</#{tag}>\n")
177
+ write_html("<#{tag}#{arguments}>\n")
178
+ end
179
+ end
180
+ if (context.level < level)
181
+ write_html("<#{tag}#{arguments}>\n")
182
+ @context_stack << Context.new(tag,level)
183
+ end
184
+ end
185
+
186
+ # Pop topmost Context from the stack
187
+ def pop_context
188
+ if (context == TopContext)
189
+ raise "can't pop at top"
190
+ end
191
+ cxt = @context_stack.pop
192
+ write_html("</#{cxt.tag}>\n")
193
+ end
194
+
195
+ def pop_context_to_level(level)
196
+ while (context.level > level)
197
+ pop_context
198
+ end
199
+ end
200
+
201
+ # Pop all Contexts from the stack
202
+ def flush_context_stack
203
+ while (context != TopContext)
204
+ pop_context
205
+ end
206
+ end
207
+
208
+ end
209
+
210
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class WikiWah
4
+
5
+ # Subst handles text-transformation using a series of regular-expression
6
+ # substitutions. It encapsulates a number of "patterns", and associated
7
+ # blocks. Each block is invoked with a MatchData object when it's
8
+ # associated pattern matches, and is expected to return a replacement
9
+ # string.
10
+ #
11
+ # The difference between using Subst and applying a series of gsub's is
12
+ # that replacement values are protected from subsequent transformations.
13
+ class Subst
14
+
15
+ def initialize
16
+ @transforms = []
17
+ end
18
+
19
+ def add_transformation(regexp, &proc)
20
+ @transforms << [regexp, proc]
21
+ end
22
+
23
+ def transform(s)
24
+ s = s.dup
25
+ store = []
26
+ @transforms.each do |transform|
27
+ (regexp, proc) = *transform
28
+ s.gsub!(regexp) {
29
+ store << proc.call($~)
30
+ "\001#{store.size - 1}\002"
31
+ }
32
+ end
33
+ s.gsub!(/\001(\d+)\002/) {
34
+ store[$1.to_i]
35
+ }
36
+ s
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class WikiWah
4
+
5
+ # Subst handles text-transformation using a series of regular-expression
6
+ # substitutions. It encapsulates a number of "patterns", and associated
7
+ # blocks. Each block is invoked with a MatchData object when it's
8
+ # associated pattern matches, and is expected to return a replacement
9
+ # string.
10
+ #
11
+ # The difference between using Subst and applying a series of gsub's is
12
+ # that replacement values are protected from subsequent transformations.
13
+ class Subst
14
+
15
+ def initialize
16
+ @transforms = []
17
+ end
18
+
19
+ def add_transformation(regexp, &proc)
20
+ @transforms << [regexp, proc]
21
+ end
22
+
23
+ def transform(s)
24
+ s = s.dup
25
+ store = []
26
+ @transforms.each do |transform|
27
+ (regexp, proc) = *transform
28
+ s.gsub!(regexp) {
29
+ store << proc.call($~)
30
+ "\001#{store.size - 1}\002"
31
+ }
32
+ end
33
+ s.gsub!(/\001(\d+)\002/) {
34
+ store[$1.to_i]
35
+ }
36
+ s
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,3 @@
1
+ class WikiWah
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,3 @@
1
+ class Wikiwah
2
+ VERSION = "0.0.1"
3
+ end
data/lib/wikiwah.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'wikiwah/flow'
2
+ require 'wikiwah/subst'
3
+ require 'wikiwah/version'
4
+
5
+ # A formatter for turning Wiki-esque text into HTML.
6
+ #
7
+ # = Block-level markup
8
+ #
9
+ # - A line prefixed by "=" is a heading. The heading-level is implied by
10
+ # the number of "=" characters.
11
+ #
12
+ # - A line beginning with "*" or "-" is an unordered list item.
13
+ #
14
+ # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
15
+ #
16
+ # - A paragraph prefixed by "|" is preformatted text (e.g. code)
17
+ #
18
+ # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
19
+ #
20
+ # = Text markup
21
+ #
22
+ # - HTML tags are rendered verbatim.
23
+ #
24
+ # - Text may by marked *bold*, /italic/, _underlined_, +monospace+
25
+ #
26
+ # - Text may be quoted with '{{{' and '}}}'
27
+ #
28
+ # - URLs turn into links.
29
+ #
30
+ # - "{LOCATION}" creates a link to LOCATION.
31
+ #
32
+ # - "{LABEL}@LOCATION" creates a link to LOCATION, with the specified
33
+ # LABEL.
34
+ #
35
+ class WikiWah
36
+
37
+ attr_writer :link_translator
38
+
39
+ def initialize
40
+ @link_translator = proc do |link| link end
41
+ init_transformer
42
+ end
43
+
44
+ # Convert WikiWah text to HTML.
45
+ def to_html(text)
46
+ Flow.convert(text) do |paragraph|
47
+ @transformer.transform(paragraph)
48
+ end
49
+ end
50
+
51
+ def self.to_html(text)
52
+ self.new.to_html(text)
53
+ end
54
+
55
+ private
56
+
57
+ def translate_link(link)
58
+ @link_translator.call(link)
59
+ end
60
+
61
+ def init_transformer
62
+ @transformer = WikiWah::Subst.new
63
+ @transformer.add_transformation(/""(.+)""/) do |match|
64
+ # Double-double-quoted
65
+ CGI.escapeHTML(match[1])
66
+ end
67
+ @transformer.add_transformation(/\\(.)/) do |match|
68
+ # Backslash-quoted
69
+ match[1]
70
+ end
71
+ @transformer.add_transformation(/\<(.+?)\>/m) do |match|
72
+ # In-line HTML
73
+ match[0]
74
+ end
75
+ @transformer.add_transformation(/\{(.+?)\}(@(\S*[\w\/]))?/m) do |match|
76
+ # Distinuished link
77
+ label = @transformer.transform(match[1])
78
+ location = translate_link(match[3] || match[1])
79
+ if location
80
+ "<a href='#{location}'>#{label}</a>"
81
+ else
82
+ "{#{label}}"
83
+ end
84
+ end
85
+ @transformer.add_transformation(/\b[a-z]+:[\w\/]\S*[\w\/]/) do |match|
86
+ # URL
87
+ "<a href='#{match[0]}'>#{match[0]}</a>"
88
+ end
89
+ @transformer.add_transformation(%r[(^|\W)([*+_/])([*+_/]*\w.*?\w[*+_/]*)\2(?!\w)]) do |match|
90
+ # Bold/italic/etc.
91
+ tag = case match[2]
92
+ when '*'; 'strong'
93
+ when '+'; 'tt'
94
+ when '/'; 'em'
95
+ when '_'; 'u'
96
+ end
97
+ content = @transformer.transform(match[3])
98
+ (match[1] + '<' + tag + '>' + content + '</' + tag + '>')
99
+ end
100
+ end
101
+
102
+ end
103
+