pseudohikiparser 0.0.0.4.develop

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/treestack'
4
+ require 'pseudohiki/inlineparser'
5
+
6
+ module PseudoHiki
7
+ class BlockParser
8
+ URI_RE = /(?:(?:https?|ftp|file):|mailto:)[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/ #borrowed from hikidoc
9
+ ID_TAG_PAT = /^\[([^\[\]]+)\]/o
10
+
11
+ module LINE_PAT
12
+ VERBATIM_BEGIN = /\A(<<<\s*)/o
13
+ VERBATIM_END = /\A(>>>\s*)/o
14
+ PLUGIN_BEGIN = /\{\{/o
15
+ PLUGIN_END = /\}\}/o
16
+ end
17
+
18
+ ParentNode = {}
19
+ HeadToLeaf = {}
20
+
21
+ attr_reader :stack
22
+
23
+ def self.assign_node_id(leaf, node)
24
+ # return unless tree[0].kind_of? Array ** block_leaf:[inline_node:[token or inline_node]]
25
+ head = leaf[0]
26
+ return unless head.kind_of? String
27
+ m = ID_TAG_PAT.match(head)
28
+ if m
29
+ node.node_id = m[1]
30
+ leaf[0] = head.sub(ID_TAG_PAT,"")
31
+ end
32
+ node
33
+ end
34
+
35
+ def self.parse(lines)
36
+ parser = self.new
37
+ parser.read_lines(lines)
38
+ parser.stack.tree
39
+ end
40
+
41
+ class BlockStack < TreeStack
42
+ def pop
43
+ self.current_node.parse_leafs
44
+ super
45
+ end
46
+ end
47
+
48
+ class BlockLeaf < BlockStack::Leaf
49
+ @@head_re = {}
50
+ attr_accessor :nominal_level
51
+ attr_accessor :node_id
52
+
53
+ def self.head_re=(head_regex)
54
+ @@head_re[self] = head_regex
55
+ end
56
+
57
+ def self.head_re
58
+ @@head_re[self]
59
+ end
60
+
61
+ def self.with_depth?
62
+ false
63
+ end
64
+
65
+ def self.create(line, inline_parser=InlineParser)
66
+ line.sub!(self.head_re,"") if self.head_re
67
+ leaf = self.new
68
+ leaf.concat(inline_parser.parse(line))
69
+ end
70
+
71
+ def self.assign_head_re(head, need_to_escape=true, reg_pat="(%s)")
72
+ head = Regexp.escape(head) if need_to_escape
73
+ self.head_re = Regexp.new('\\A'+reg_pat%[head])
74
+ end
75
+
76
+ def head_re
77
+ @@head_re[self.class]
78
+ end
79
+
80
+ def block
81
+ ParentNode[self.class]
82
+ end
83
+
84
+ def push_block(stack)
85
+ stack.push(block.new)
86
+ end
87
+
88
+ def under_appropriate_block?(stack)
89
+ stack.current_node.kind_of? block and stack.current_node.nominal_level == nominal_level
90
+ end
91
+
92
+ def push_self(stack)
93
+ push_block(stack) unless under_appropriate_block?(stack)
94
+ super(stack)
95
+ end
96
+
97
+ def parse_leafs
98
+ parsed = InlineParser.parse(self.join)
99
+ self.clear
100
+ self.concat(parsed)
101
+ end
102
+ end
103
+
104
+ class NonNestedBlockLeaf < BlockLeaf
105
+ include TreeStack::Mergeable
106
+
107
+ def self.create(line)
108
+ line.sub!(self.head_re,"") if self.head_re
109
+ self.new.tap {|leaf| leaf.push line }
110
+ end
111
+
112
+ def push_self(stack)
113
+ push_block(stack) unless under_appropriate_block?(stack)
114
+ if stack.last_leaf.kind_of? self.class
115
+ stack.last_leaf.merge(self)
116
+ else
117
+ super(stack)
118
+ end
119
+ end
120
+ end
121
+
122
+ class NestedBlockLeaf < BlockLeaf
123
+ def self.assign_head_re(head, need_to_escape)
124
+ super(head, need_to_escape, "(%s)+")
125
+ end
126
+
127
+ def self.create(line)
128
+ m = self.head_re.match(line)
129
+ super(line).tap {|leaf| leaf.nominal_level = m[0].length }
130
+ end
131
+
132
+ def self.with_depth?
133
+ true
134
+ end
135
+
136
+ def push_self(stack)
137
+ super(stack)
138
+ BlockParser.assign_node_id(self[0], self)
139
+ end
140
+ end
141
+
142
+ class ListTypeLeaf < NestedBlockLeaf; end
143
+
144
+ class BlockNode < BlockStack::Node
145
+ attr_accessor :base_level, :relative_level_from_base
146
+ attr_accessor :node_id
147
+
148
+ def nominal_level
149
+ return nil unless first
150
+ first.nominal_level
151
+ end
152
+
153
+ def push_self(stack)
154
+ @stack = stack
155
+ super(stack)
156
+ end
157
+
158
+ def breakable?(breaker)
159
+ not (kind_of?(breaker.block) and nominal_level == breaker.nominal_level)
160
+ end
161
+
162
+ def parse_leafs; end
163
+ end
164
+
165
+ class NonNestedBlockNode < BlockNode
166
+ def parse_leafs
167
+ self.each {|leaf| leaf.parse_leafs }
168
+ end
169
+ end
170
+
171
+ class NestedBlockNode < BlockNode; end
172
+
173
+ class ListTypeBlockNode < NestedBlockNode
174
+ def breakable?(breaker)
175
+ not (breaker.block.superclass == ListTypeBlockNode and nominal_level <= breaker.nominal_level)
176
+ end
177
+ end
178
+
179
+ class ListLeafNode < NestedBlockNode
180
+ def breakable?(breaker)
181
+ not (breaker.kind_of?(ListTypeLeaf) and nominal_level < breaker.nominal_level)
182
+ end
183
+ end
184
+
185
+ module BlockElement
186
+ class DescLeaf < BlockLeaf; end
187
+ class VerbatimLeaf < BlockLeaf; end
188
+ class QuoteLeaf < NonNestedBlockLeaf; end
189
+ class TableLeaf < BlockLeaf; end
190
+ class CommentOutLeaf < BlockLeaf; end
191
+ class HeadingLeaf < NestedBlockLeaf; end
192
+ class ParagraphLeaf < NonNestedBlockLeaf; end
193
+ class HrLeaf < BlockLeaf; end
194
+ class BlockNodeEnd < BlockLeaf; end
195
+
196
+ class ListLeaf < ListTypeLeaf; end
197
+ class EnumLeaf < ListTypeLeaf; end
198
+
199
+ class DescNode < BlockNode; end
200
+ class VerbatimNode < BlockNode; end
201
+ class QuoteNode < NonNestedBlockNode; end
202
+ class TableNode < BlockNode; end
203
+ class CommentOutNode < BlockNode; end
204
+ class HeadingNode < NestedBlockNode; end
205
+ class ParagraphNode < NonNestedBlockNode; end
206
+ class HrNode < BlockNode; end
207
+
208
+ class ListNode < ListTypeBlockNode; end
209
+ class EnumNode < ListTypeBlockNode; end
210
+
211
+ class ListWrapNode < ListLeafNode; end
212
+ class EnumWrapNode < ListLeafNode; end
213
+ end
214
+ include BlockElement
215
+
216
+ class BlockElement::BlockNodeEnd
217
+ def push_self(stack); end
218
+ end
219
+
220
+ class BlockElement::QuoteNode
221
+ def parse_leafs
222
+ self[0] = BlockParser.parse(self[0])
223
+ end
224
+ end
225
+
226
+ # class HeadingNode
227
+ class BlockElement::HeadingNode
228
+ def breakable?(breaker)
229
+ kind_of?(breaker.block) and nominal_level >= breaker.nominal_level
230
+ end
231
+ end
232
+
233
+ class BlockElement::VerbatimLeaf
234
+ def self.create(line)
235
+ line.sub!(self.head_re,"") if self.head_re
236
+ self.new.tap {|leaf| leaf.push line }
237
+ end
238
+ end
239
+
240
+ class BlockElement::TableLeaf
241
+ def self.create(line)
242
+ super(line, TableRowParser)
243
+ end
244
+ end
245
+
246
+ class ListTypeLeaf
247
+ include BlockElement
248
+
249
+ Wrapper = {
250
+ ListLeaf => ListWrapNode,
251
+ EnumLeaf => EnumWrapNode
252
+ }
253
+
254
+ def push_self(stack)
255
+ push_block(stack) unless under_appropriate_block?(stack)
256
+ stack.push Wrapper[self.class].new
257
+ BlockParser.assign_node_id(self[0], stack.current_node)
258
+ stack.push_as_leaf self
259
+ end
260
+ end
261
+
262
+ [[DescLeaf, DescNode],
263
+ [VerbatimLeaf, VerbatimNode],
264
+ [QuoteLeaf, QuoteNode],
265
+ [TableLeaf, TableNode],
266
+ [CommentOutLeaf, CommentOutNode],
267
+ [HeadingLeaf, HeadingNode],
268
+ [ParagraphLeaf, ParagraphNode],
269
+ [HrLeaf, HrNode],
270
+ [ListLeaf, ListNode],
271
+ [EnumLeaf, EnumNode]
272
+ ].each do |leaf, node|
273
+ ParentNode[leaf] = node
274
+ end
275
+
276
+ ParentNode[BlockNodeEnd] = BlockNodeEnd
277
+
278
+ def self.assign_head_re
279
+ space = '\s'
280
+ head_pats = []
281
+ [[':', DescLeaf],
282
+ [space, VerbatimLeaf],
283
+ ['""', QuoteLeaf],
284
+ ['||', TableLeaf],
285
+ ['//', CommentOutLeaf],
286
+ ['!', HeadingLeaf],
287
+ ['*', ListLeaf],
288
+ ['#', EnumLeaf]
289
+ ].each do |head, leaf|
290
+ HeadToLeaf[head] = leaf
291
+ escaped_head = head != space ? Regexp.escape(head) : head
292
+ head_pat = leaf.with_depth? ? "(#{escaped_head})+" : "(#{escaped_head})"
293
+ head_pats.push head_pat
294
+ leaf.head_re = Regexp.new('\\A'+head_pat)
295
+ end
296
+ HrLeaf.head_re = Regexp.new(/\A(----)\s*$/o)
297
+ BlockNodeEnd.head_re = Regexp.new(/^(\r?\n?)$/o)
298
+ Regexp.new('\\A('+head_pats.join('|')+')')
299
+ end
300
+ HEAD_RE = assign_head_re
301
+
302
+ def initialize
303
+ root_node = BlockNode.new
304
+ def root_node.breakable?(breaker)
305
+ false
306
+ end
307
+ @stack = BlockStack.new(root_node)
308
+ end
309
+
310
+ def breakable?(breaker)
311
+ @stack.current_node.breakable?(breaker)
312
+ end
313
+
314
+ def tagfy_link(line)
315
+ line.gsub(URI_RE) do |url|
316
+ unless ($`)[-2,2] == "[[" or ($`)[-1,1] == "|"
317
+ "[[#{url}]]"
318
+ else
319
+ url
320
+ end
321
+ end
322
+ end
323
+
324
+ def select_leaf_type(line)
325
+ [BlockNodeEnd, HrLeaf].each {|leaf| return leaf if leaf.head_re =~ line }
326
+ matched = HEAD_RE.match(line)
327
+ return HeadToLeaf[matched[0]]||HeadToLeaf[line[0,1]] || HeadToLeaf['\s'] if matched
328
+ ParagraphLeaf
329
+ end
330
+
331
+ def add_verbatim_block(lines)
332
+ until lines.empty? or LINE_PAT::VERBATIM_END =~ lines.first
333
+ lines[0] = " " + lines[0] if BlockNodeEnd.head_re =~ lines.first
334
+ @stack.push(VerbatimLeaf.create(lines.shift))
335
+ end
336
+ lines.shift if LINE_PAT::VERBATIM_END =~ lines.first
337
+ end
338
+
339
+ def add_leaf(line)
340
+ leaf = select_leaf_type(line).create(line)
341
+ while breakable?(leaf)
342
+ @stack.pop
343
+ end
344
+ @stack.push leaf
345
+ end
346
+
347
+ def read_lines(lines)
348
+ while line = lines.shift
349
+ if LINE_PAT::VERBATIM_BEGIN =~ line
350
+ add_verbatim_block(lines)
351
+ else
352
+ line = self.tagfy_link(line) unless VerbatimLeaf.head_re =~ line
353
+ add_leaf(line)
354
+ end
355
+ end
356
+ @stack.pop
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+
6
+ module PseudoHiki
7
+ class HtmlFormat
8
+ include InlineParser::InlineElement
9
+ include BlockParser::BlockElement
10
+ include TableRowParser::InlineElement
11
+
12
+ #for InlineParser
13
+ LINK, IMG, EM, STRONG, DEL = %w(a img em strong del)
14
+ HREF, SRC, ALT = %w(href src alt)
15
+ PLAIN, PLUGIN = %w(plain span)
16
+ #for BlockParser
17
+ DESC, VERB, QUOTE, TABLE, PARA, HR, UL, OL = %w(dl pre blockquote table p hr ul ol)
18
+ SECTION = "section"
19
+ DT, DD, TR, HEADING, LI = %w(dt dd tr h li)
20
+ DescSep = [InlineParser::DescSep]
21
+
22
+ Formatter = {}
23
+
24
+ attr_reader :element_name
25
+ attr_writer :generator, :formatter
26
+
27
+ def self.setup_new_formatter(new_formatter, generator)
28
+ new_formatter.each do |node_class, formatter|
29
+ new_formatter[node_class] = formatter.dup
30
+ new_formatter[node_class].generator = generator
31
+ new_formatter[node_class].formatter = new_formatter
32
+ end
33
+ end
34
+
35
+ def self.get_plain
36
+ self::Formatter[PlainNode]
37
+ end
38
+
39
+ def self.format(tree)
40
+ formatter = self.get_plain
41
+ tree.accept(formatter)
42
+ end
43
+
44
+ def initialize(element_name, generator=HtmlElement)
45
+ @element_name = element_name
46
+ @generator = generator
47
+ @formatter = Formatter
48
+ end
49
+
50
+ def visited_result(element)
51
+ visitor = @formatter[element.class]||@formatter[PlainNode]
52
+ element.accept(visitor)
53
+ end
54
+
55
+ def push_visited_results(element, tree)
56
+ tree.each {|token| element.push visited_result(token) }
57
+ end
58
+
59
+ def visit(tree)
60
+ htmlelement = create_self_element(tree)
61
+ push_visited_results(htmlelement, tree)
62
+ htmlelement
63
+ end
64
+
65
+ def create_self_element(tree=nil)
66
+ @generator.create(@element_name)
67
+ end
68
+
69
+ def split_into_parts(tree, separator)
70
+ chunks = []
71
+ while sep_index = tree.index(separator)
72
+ chunks.push tree.shift(sep_index)
73
+ tree.shift
74
+ end
75
+ chunks.push tree
76
+ end
77
+
78
+ #for InlineParser
79
+
80
+ class LinkNodeFormatter < self
81
+ def visit(tree)
82
+ tree = tree.dup
83
+ caption = get_caption(tree)
84
+ begin
85
+ ref = tree.last.join
86
+ rescue NoMethodError
87
+ raise NoMethodError unless tree.empty?
88
+ STDERR.puts "No uri is specified for #{caption}"
89
+ end
90
+ if ImageSuffix =~ ref
91
+ htmlelement = ImgFormat.create_self_element
92
+ htmlelement[SRC] = tree.join
93
+ htmlelement[ALT] = caption.join if caption
94
+ else
95
+ htmlelement = create_self_element
96
+ htmlelement[HREF] = tree.join
97
+ htmlelement.push caption||tree.join
98
+ end
99
+ htmlelement
100
+ end
101
+
102
+ def get_caption(tree)
103
+ first_part, second_part = split_into_parts(tree, [LinkSep])
104
+ return nil unless second_part
105
+ first_part.map {|token| visited_result(token) }
106
+ end
107
+ end
108
+
109
+ class InlineLeafFormatter < self
110
+ def visit(leaf)
111
+ @generator.escape(leaf.first)
112
+ end
113
+ end
114
+
115
+ class PlainNodeFormatter < self
116
+ def create_self_element(tree=nil)
117
+ @generator::Children.new
118
+ end
119
+ end
120
+
121
+ #for BlockParser
122
+
123
+ class VerbatimNodeFormatter < self
124
+ def visit(tree)
125
+ create_self_element.tap do |element|
126
+ contents = @generator.escape(tree.join).gsub(BlockParser::URI_RE) do |url|
127
+ @generator.create("a", url, "href" => url).to_s
128
+ end
129
+ element.push contents
130
+ end
131
+ end
132
+ end
133
+
134
+ class CommentOutNodeFormatter < self
135
+ def visit(tree); ""; end
136
+ end
137
+
138
+ class HeadingNodeFormatter < self
139
+ def create_self_element(tree)
140
+ super(tree).tap do |element|
141
+ heading_level = "h#{tree.first.nominal_level}"
142
+ element['class'] ||= heading_level
143
+ element['class'] += " " + heading_level unless element['class'] == heading_level
144
+ end
145
+ end
146
+ end
147
+
148
+ class DescLeafFormatter < self
149
+ def visit(tree)
150
+ tree = tree.dup
151
+ element = @generator::Children.new
152
+ dt_part, dd_part = split_into_parts(tree, DescSep)
153
+ dt = super(dt_part)
154
+ element.push dt
155
+ unless dd_part.nil? or dd_part.empty?
156
+ dd = @generator.create(DD)
157
+ push_visited_results(dd, dd_part)
158
+ element.push dd
159
+ end
160
+ element
161
+ end
162
+ end
163
+
164
+ class TableCellNodeFormatter < self
165
+ def visit(tree)
166
+ @element_name = tree.cell_type
167
+ super(tree).tap do |element|
168
+ element["rowspan"] = tree.rowspan if tree.rowspan > 1
169
+ element["colspan"] = tree.colspan if tree.colspan > 1
170
+ end
171
+ end
172
+ end
173
+
174
+ class HeadingLeafFormatter < self
175
+ def create_self_element(tree)
176
+ @generator.create(@element_name+tree.nominal_level.to_s).tap do |element|
177
+ element["id"] = tree.node_id.upcase if tree.node_id
178
+ end
179
+ end
180
+ end
181
+
182
+ class ListLeafNodeFormatter < self
183
+ def create_self_element(tree)
184
+ super(tree).tap do |element|
185
+ element["id"] = tree.node_id.upcase if tree.node_id
186
+ end
187
+ end
188
+ end
189
+
190
+ [ [EmNode,EM],
191
+ [StrongNode,STRONG],
192
+ [DelNode,DEL],
193
+ [PluginNode,PLUGIN], #Until here is for InlineParser
194
+ [DescNode, DESC],
195
+ [QuoteNode, QUOTE],
196
+ [TableNode, TABLE],
197
+ [ParagraphNode, PARA],
198
+ [HrNode, HR],
199
+ [ListNode, UL],
200
+ [EnumNode, OL],
201
+ [TableLeaf, TR], #Until here is for BlockParser
202
+ ].each {|node_class, element| Formatter[node_class] = self.new(element) }
203
+
204
+ #for InlineParser
205
+ ImgFormat = self.new(IMG)
206
+ Formatter[LinkNode] = LinkNodeFormatter.new(LINK)
207
+ Formatter[InlineLeaf] = InlineLeafFormatter.new(nil)
208
+ Formatter[PlainNode] = PlainNodeFormatter.new(PLAIN)
209
+ #for BlockParser
210
+ Formatter[VerbatimNode] = VerbatimNodeFormatter.new(VERB)
211
+ Formatter[CommentOutNode] = CommentOutNodeFormatter.new(nil)
212
+ Formatter[HeadingNode] = HeadingNodeFormatter.new(SECTION)
213
+ Formatter[DescLeaf] = DescLeafFormatter.new(DT)
214
+ Formatter[TableCellNode] = TableCellNodeFormatter.new(nil)
215
+ Formatter[HeadingLeaf] = HeadingLeafFormatter.new(HEADING)
216
+ Formatter[ListWrapNode] = ListLeafNodeFormatter.new(LI)
217
+ Formatter[EnumWrapNode] = ListLeafNodeFormatter.new(LI)
218
+ end
219
+
220
+ class XhtmlFormat < HtmlFormat
221
+ Formatter = HtmlFormat::Formatter.dup
222
+ setup_new_formatter(Formatter, XhtmlElement)
223
+ end
224
+
225
+ class Xhtml5Format < XhtmlFormat
226
+ Formatter = HtmlFormat::Formatter.dup
227
+ setup_new_formatter(Formatter, Xhtml5Element)
228
+ end
229
+ end