pseudohikiparser 0.0.0.4.develop

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/treestack'
4
+ require 'pseudohiki/inlineparser'
5
+
6
+ module PseudoHiki
7
+ class BlockParser
8
+ URI_RE = /(?:(?:https?|ftp|file):|mailto:)[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/ #borrowed from hikidoc
9
+ ID_TAG_PAT = /^\[([^\[\]]+)\]/o
10
+
11
+ module LINE_PAT
12
+ VERBATIM_BEGIN = /\A(<<<\s*)/o
13
+ VERBATIM_END = /\A(>>>\s*)/o
14
+ PLUGIN_BEGIN = /\{\{/o
15
+ PLUGIN_END = /\}\}/o
16
+ end
17
+
18
+ ParentNode = {}
19
+ HeadToLeaf = {}
20
+
21
+ attr_reader :stack
22
+
23
+ def self.assign_node_id(leaf, node)
24
+ # return unless tree[0].kind_of? Array ** block_leaf:[inline_node:[token or inline_node]]
25
+ head = leaf[0]
26
+ return unless head.kind_of? String
27
+ m = ID_TAG_PAT.match(head)
28
+ if m
29
+ node.node_id = m[1]
30
+ leaf[0] = head.sub(ID_TAG_PAT,"")
31
+ end
32
+ node
33
+ end
34
+
35
+ def self.parse(lines)
36
+ parser = self.new
37
+ parser.read_lines(lines)
38
+ parser.stack.tree
39
+ end
40
+
41
+ class BlockStack < TreeStack
42
+ def pop
43
+ self.current_node.parse_leafs
44
+ super
45
+ end
46
+ end
47
+
48
+ class BlockLeaf < BlockStack::Leaf
49
+ @@head_re = {}
50
+ attr_accessor :nominal_level
51
+ attr_accessor :node_id
52
+
53
+ def self.head_re=(head_regex)
54
+ @@head_re[self] = head_regex
55
+ end
56
+
57
+ def self.head_re
58
+ @@head_re[self]
59
+ end
60
+
61
+ def self.with_depth?
62
+ false
63
+ end
64
+
65
+ def self.create(line, inline_parser=InlineParser)
66
+ line.sub!(self.head_re,"") if self.head_re
67
+ leaf = self.new
68
+ leaf.concat(inline_parser.parse(line))
69
+ end
70
+
71
+ def self.assign_head_re(head, need_to_escape=true, reg_pat="(%s)")
72
+ head = Regexp.escape(head) if need_to_escape
73
+ self.head_re = Regexp.new('\\A'+reg_pat%[head])
74
+ end
75
+
76
+ def head_re
77
+ @@head_re[self.class]
78
+ end
79
+
80
+ def block
81
+ ParentNode[self.class]
82
+ end
83
+
84
+ def push_block(stack)
85
+ stack.push(block.new)
86
+ end
87
+
88
+ def under_appropriate_block?(stack)
89
+ stack.current_node.kind_of? block and stack.current_node.nominal_level == nominal_level
90
+ end
91
+
92
+ def push_self(stack)
93
+ push_block(stack) unless under_appropriate_block?(stack)
94
+ super(stack)
95
+ end
96
+
97
+ def parse_leafs
98
+ parsed = InlineParser.parse(self.join)
99
+ self.clear
100
+ self.concat(parsed)
101
+ end
102
+ end
103
+
104
+ class NonNestedBlockLeaf < BlockLeaf
105
+ include TreeStack::Mergeable
106
+
107
+ def self.create(line)
108
+ line.sub!(self.head_re,"") if self.head_re
109
+ self.new.tap {|leaf| leaf.push line }
110
+ end
111
+
112
+ def push_self(stack)
113
+ push_block(stack) unless under_appropriate_block?(stack)
114
+ if stack.last_leaf.kind_of? self.class
115
+ stack.last_leaf.merge(self)
116
+ else
117
+ super(stack)
118
+ end
119
+ end
120
+ end
121
+
122
+ class NestedBlockLeaf < BlockLeaf
123
+ def self.assign_head_re(head, need_to_escape)
124
+ super(head, need_to_escape, "(%s)+")
125
+ end
126
+
127
+ def self.create(line)
128
+ m = self.head_re.match(line)
129
+ super(line).tap {|leaf| leaf.nominal_level = m[0].length }
130
+ end
131
+
132
+ def self.with_depth?
133
+ true
134
+ end
135
+
136
+ def push_self(stack)
137
+ super(stack)
138
+ BlockParser.assign_node_id(self[0], self)
139
+ end
140
+ end
141
+
142
+ class ListTypeLeaf < NestedBlockLeaf; end
143
+
144
+ class BlockNode < BlockStack::Node
145
+ attr_accessor :base_level, :relative_level_from_base
146
+ attr_accessor :node_id
147
+
148
+ def nominal_level
149
+ return nil unless first
150
+ first.nominal_level
151
+ end
152
+
153
+ def push_self(stack)
154
+ @stack = stack
155
+ super(stack)
156
+ end
157
+
158
+ def breakable?(breaker)
159
+ not (kind_of?(breaker.block) and nominal_level == breaker.nominal_level)
160
+ end
161
+
162
+ def parse_leafs; end
163
+ end
164
+
165
+ class NonNestedBlockNode < BlockNode
166
+ def parse_leafs
167
+ self.each {|leaf| leaf.parse_leafs }
168
+ end
169
+ end
170
+
171
+ class NestedBlockNode < BlockNode; end
172
+
173
+ class ListTypeBlockNode < NestedBlockNode
174
+ def breakable?(breaker)
175
+ not (breaker.block.superclass == ListTypeBlockNode and nominal_level <= breaker.nominal_level)
176
+ end
177
+ end
178
+
179
+ class ListLeafNode < NestedBlockNode
180
+ def breakable?(breaker)
181
+ not (breaker.kind_of?(ListTypeLeaf) and nominal_level < breaker.nominal_level)
182
+ end
183
+ end
184
+
185
+ module BlockElement
186
+ class DescLeaf < BlockLeaf; end
187
+ class VerbatimLeaf < BlockLeaf; end
188
+ class QuoteLeaf < NonNestedBlockLeaf; end
189
+ class TableLeaf < BlockLeaf; end
190
+ class CommentOutLeaf < BlockLeaf; end
191
+ class HeadingLeaf < NestedBlockLeaf; end
192
+ class ParagraphLeaf < NonNestedBlockLeaf; end
193
+ class HrLeaf < BlockLeaf; end
194
+ class BlockNodeEnd < BlockLeaf; end
195
+
196
+ class ListLeaf < ListTypeLeaf; end
197
+ class EnumLeaf < ListTypeLeaf; end
198
+
199
+ class DescNode < BlockNode; end
200
+ class VerbatimNode < BlockNode; end
201
+ class QuoteNode < NonNestedBlockNode; end
202
+ class TableNode < BlockNode; end
203
+ class CommentOutNode < BlockNode; end
204
+ class HeadingNode < NestedBlockNode; end
205
+ class ParagraphNode < NonNestedBlockNode; end
206
+ class HrNode < BlockNode; end
207
+
208
+ class ListNode < ListTypeBlockNode; end
209
+ class EnumNode < ListTypeBlockNode; end
210
+
211
+ class ListWrapNode < ListLeafNode; end
212
+ class EnumWrapNode < ListLeafNode; end
213
+ end
214
+ include BlockElement
215
+
216
+ class BlockElement::BlockNodeEnd
217
+ def push_self(stack); end
218
+ end
219
+
220
+ class BlockElement::QuoteNode
221
+ def parse_leafs
222
+ self[0] = BlockParser.parse(self[0])
223
+ end
224
+ end
225
+
226
+ # class HeadingNode
227
+ class BlockElement::HeadingNode
228
+ def breakable?(breaker)
229
+ kind_of?(breaker.block) and nominal_level >= breaker.nominal_level
230
+ end
231
+ end
232
+
233
+ class BlockElement::VerbatimLeaf
234
+ def self.create(line)
235
+ line.sub!(self.head_re,"") if self.head_re
236
+ self.new.tap {|leaf| leaf.push line }
237
+ end
238
+ end
239
+
240
+ class BlockElement::TableLeaf
241
+ def self.create(line)
242
+ super(line, TableRowParser)
243
+ end
244
+ end
245
+
246
+ class ListTypeLeaf
247
+ include BlockElement
248
+
249
+ Wrapper = {
250
+ ListLeaf => ListWrapNode,
251
+ EnumLeaf => EnumWrapNode
252
+ }
253
+
254
+ def push_self(stack)
255
+ push_block(stack) unless under_appropriate_block?(stack)
256
+ stack.push Wrapper[self.class].new
257
+ BlockParser.assign_node_id(self[0], stack.current_node)
258
+ stack.push_as_leaf self
259
+ end
260
+ end
261
+
262
+ [[DescLeaf, DescNode],
263
+ [VerbatimLeaf, VerbatimNode],
264
+ [QuoteLeaf, QuoteNode],
265
+ [TableLeaf, TableNode],
266
+ [CommentOutLeaf, CommentOutNode],
267
+ [HeadingLeaf, HeadingNode],
268
+ [ParagraphLeaf, ParagraphNode],
269
+ [HrLeaf, HrNode],
270
+ [ListLeaf, ListNode],
271
+ [EnumLeaf, EnumNode]
272
+ ].each do |leaf, node|
273
+ ParentNode[leaf] = node
274
+ end
275
+
276
+ ParentNode[BlockNodeEnd] = BlockNodeEnd
277
+
278
+ def self.assign_head_re
279
+ space = '\s'
280
+ head_pats = []
281
+ [[':', DescLeaf],
282
+ [space, VerbatimLeaf],
283
+ ['""', QuoteLeaf],
284
+ ['||', TableLeaf],
285
+ ['//', CommentOutLeaf],
286
+ ['!', HeadingLeaf],
287
+ ['*', ListLeaf],
288
+ ['#', EnumLeaf]
289
+ ].each do |head, leaf|
290
+ HeadToLeaf[head] = leaf
291
+ escaped_head = head != space ? Regexp.escape(head) : head
292
+ head_pat = leaf.with_depth? ? "(#{escaped_head})+" : "(#{escaped_head})"
293
+ head_pats.push head_pat
294
+ leaf.head_re = Regexp.new('\\A'+head_pat)
295
+ end
296
+ HrLeaf.head_re = Regexp.new(/\A(----)\s*$/o)
297
+ BlockNodeEnd.head_re = Regexp.new(/^(\r?\n?)$/o)
298
+ Regexp.new('\\A('+head_pats.join('|')+')')
299
+ end
300
+ HEAD_RE = assign_head_re
301
+
302
+ def initialize
303
+ root_node = BlockNode.new
304
+ def root_node.breakable?(breaker)
305
+ false
306
+ end
307
+ @stack = BlockStack.new(root_node)
308
+ end
309
+
310
+ def breakable?(breaker)
311
+ @stack.current_node.breakable?(breaker)
312
+ end
313
+
314
+ def tagfy_link(line)
315
+ line.gsub(URI_RE) do |url|
316
+ unless ($`)[-2,2] == "[[" or ($`)[-1,1] == "|"
317
+ "[[#{url}]]"
318
+ else
319
+ url
320
+ end
321
+ end
322
+ end
323
+
324
+ def select_leaf_type(line)
325
+ [BlockNodeEnd, HrLeaf].each {|leaf| return leaf if leaf.head_re =~ line }
326
+ matched = HEAD_RE.match(line)
327
+ return HeadToLeaf[matched[0]]||HeadToLeaf[line[0,1]] || HeadToLeaf['\s'] if matched
328
+ ParagraphLeaf
329
+ end
330
+
331
+ def add_verbatim_block(lines)
332
+ until lines.empty? or LINE_PAT::VERBATIM_END =~ lines.first
333
+ lines[0] = " " + lines[0] if BlockNodeEnd.head_re =~ lines.first
334
+ @stack.push(VerbatimLeaf.create(lines.shift))
335
+ end
336
+ lines.shift if LINE_PAT::VERBATIM_END =~ lines.first
337
+ end
338
+
339
+ def add_leaf(line)
340
+ leaf = select_leaf_type(line).create(line)
341
+ while breakable?(leaf)
342
+ @stack.pop
343
+ end
344
+ @stack.push leaf
345
+ end
346
+
347
+ def read_lines(lines)
348
+ while line = lines.shift
349
+ if LINE_PAT::VERBATIM_BEGIN =~ line
350
+ add_verbatim_block(lines)
351
+ else
352
+ line = self.tagfy_link(line) unless VerbatimLeaf.head_re =~ line
353
+ add_leaf(line)
354
+ end
355
+ end
356
+ @stack.pop
357
+ end
358
+ end
359
+ end
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+
6
+ module PseudoHiki
7
+ class HtmlFormat
8
+ include InlineParser::InlineElement
9
+ include BlockParser::BlockElement
10
+ include TableRowParser::InlineElement
11
+
12
+ #for InlineParser
13
+ LINK, IMG, EM, STRONG, DEL = %w(a img em strong del)
14
+ HREF, SRC, ALT = %w(href src alt)
15
+ PLAIN, PLUGIN = %w(plain span)
16
+ #for BlockParser
17
+ DESC, VERB, QUOTE, TABLE, PARA, HR, UL, OL = %w(dl pre blockquote table p hr ul ol)
18
+ SECTION = "section"
19
+ DT, DD, TR, HEADING, LI = %w(dt dd tr h li)
20
+ DescSep = [InlineParser::DescSep]
21
+
22
+ Formatter = {}
23
+
24
+ attr_reader :element_name
25
+ attr_writer :generator, :formatter
26
+
27
+ def self.setup_new_formatter(new_formatter, generator)
28
+ new_formatter.each do |node_class, formatter|
29
+ new_formatter[node_class] = formatter.dup
30
+ new_formatter[node_class].generator = generator
31
+ new_formatter[node_class].formatter = new_formatter
32
+ end
33
+ end
34
+
35
+ def self.get_plain
36
+ self::Formatter[PlainNode]
37
+ end
38
+
39
+ def self.format(tree)
40
+ formatter = self.get_plain
41
+ tree.accept(formatter)
42
+ end
43
+
44
+ def initialize(element_name, generator=HtmlElement)
45
+ @element_name = element_name
46
+ @generator = generator
47
+ @formatter = Formatter
48
+ end
49
+
50
+ def visited_result(element)
51
+ visitor = @formatter[element.class]||@formatter[PlainNode]
52
+ element.accept(visitor)
53
+ end
54
+
55
+ def push_visited_results(element, tree)
56
+ tree.each {|token| element.push visited_result(token) }
57
+ end
58
+
59
+ def visit(tree)
60
+ htmlelement = create_self_element(tree)
61
+ push_visited_results(htmlelement, tree)
62
+ htmlelement
63
+ end
64
+
65
+ def create_self_element(tree=nil)
66
+ @generator.create(@element_name)
67
+ end
68
+
69
+ def split_into_parts(tree, separator)
70
+ chunks = []
71
+ while sep_index = tree.index(separator)
72
+ chunks.push tree.shift(sep_index)
73
+ tree.shift
74
+ end
75
+ chunks.push tree
76
+ end
77
+
78
+ #for InlineParser
79
+
80
+ class LinkNodeFormatter < self
81
+ def visit(tree)
82
+ tree = tree.dup
83
+ caption = get_caption(tree)
84
+ begin
85
+ ref = tree.last.join
86
+ rescue NoMethodError
87
+ raise NoMethodError unless tree.empty?
88
+ STDERR.puts "No uri is specified for #{caption}"
89
+ end
90
+ if ImageSuffix =~ ref
91
+ htmlelement = ImgFormat.create_self_element
92
+ htmlelement[SRC] = tree.join
93
+ htmlelement[ALT] = caption.join if caption
94
+ else
95
+ htmlelement = create_self_element
96
+ htmlelement[HREF] = tree.join
97
+ htmlelement.push caption||tree.join
98
+ end
99
+ htmlelement
100
+ end
101
+
102
+ def get_caption(tree)
103
+ first_part, second_part = split_into_parts(tree, [LinkSep])
104
+ return nil unless second_part
105
+ first_part.map {|token| visited_result(token) }
106
+ end
107
+ end
108
+
109
+ class InlineLeafFormatter < self
110
+ def visit(leaf)
111
+ @generator.escape(leaf.first)
112
+ end
113
+ end
114
+
115
+ class PlainNodeFormatter < self
116
+ def create_self_element(tree=nil)
117
+ @generator::Children.new
118
+ end
119
+ end
120
+
121
+ #for BlockParser
122
+
123
+ class VerbatimNodeFormatter < self
124
+ def visit(tree)
125
+ create_self_element.tap do |element|
126
+ contents = @generator.escape(tree.join).gsub(BlockParser::URI_RE) do |url|
127
+ @generator.create("a", url, "href" => url).to_s
128
+ end
129
+ element.push contents
130
+ end
131
+ end
132
+ end
133
+
134
+ class CommentOutNodeFormatter < self
135
+ def visit(tree); ""; end
136
+ end
137
+
138
+ class HeadingNodeFormatter < self
139
+ def create_self_element(tree)
140
+ super(tree).tap do |element|
141
+ heading_level = "h#{tree.first.nominal_level}"
142
+ element['class'] ||= heading_level
143
+ element['class'] += " " + heading_level unless element['class'] == heading_level
144
+ end
145
+ end
146
+ end
147
+
148
+ class DescLeafFormatter < self
149
+ def visit(tree)
150
+ tree = tree.dup
151
+ element = @generator::Children.new
152
+ dt_part, dd_part = split_into_parts(tree, DescSep)
153
+ dt = super(dt_part)
154
+ element.push dt
155
+ unless dd_part.nil? or dd_part.empty?
156
+ dd = @generator.create(DD)
157
+ push_visited_results(dd, dd_part)
158
+ element.push dd
159
+ end
160
+ element
161
+ end
162
+ end
163
+
164
+ class TableCellNodeFormatter < self
165
+ def visit(tree)
166
+ @element_name = tree.cell_type
167
+ super(tree).tap do |element|
168
+ element["rowspan"] = tree.rowspan if tree.rowspan > 1
169
+ element["colspan"] = tree.colspan if tree.colspan > 1
170
+ end
171
+ end
172
+ end
173
+
174
+ class HeadingLeafFormatter < self
175
+ def create_self_element(tree)
176
+ @generator.create(@element_name+tree.nominal_level.to_s).tap do |element|
177
+ element["id"] = tree.node_id.upcase if tree.node_id
178
+ end
179
+ end
180
+ end
181
+
182
+ class ListLeafNodeFormatter < self
183
+ def create_self_element(tree)
184
+ super(tree).tap do |element|
185
+ element["id"] = tree.node_id.upcase if tree.node_id
186
+ end
187
+ end
188
+ end
189
+
190
+ [ [EmNode,EM],
191
+ [StrongNode,STRONG],
192
+ [DelNode,DEL],
193
+ [PluginNode,PLUGIN], #Until here is for InlineParser
194
+ [DescNode, DESC],
195
+ [QuoteNode, QUOTE],
196
+ [TableNode, TABLE],
197
+ [ParagraphNode, PARA],
198
+ [HrNode, HR],
199
+ [ListNode, UL],
200
+ [EnumNode, OL],
201
+ [TableLeaf, TR], #Until here is for BlockParser
202
+ ].each {|node_class, element| Formatter[node_class] = self.new(element) }
203
+
204
+ #for InlineParser
205
+ ImgFormat = self.new(IMG)
206
+ Formatter[LinkNode] = LinkNodeFormatter.new(LINK)
207
+ Formatter[InlineLeaf] = InlineLeafFormatter.new(nil)
208
+ Formatter[PlainNode] = PlainNodeFormatter.new(PLAIN)
209
+ #for BlockParser
210
+ Formatter[VerbatimNode] = VerbatimNodeFormatter.new(VERB)
211
+ Formatter[CommentOutNode] = CommentOutNodeFormatter.new(nil)
212
+ Formatter[HeadingNode] = HeadingNodeFormatter.new(SECTION)
213
+ Formatter[DescLeaf] = DescLeafFormatter.new(DT)
214
+ Formatter[TableCellNode] = TableCellNodeFormatter.new(nil)
215
+ Formatter[HeadingLeaf] = HeadingLeafFormatter.new(HEADING)
216
+ Formatter[ListWrapNode] = ListLeafNodeFormatter.new(LI)
217
+ Formatter[EnumWrapNode] = ListLeafNodeFormatter.new(LI)
218
+ end
219
+
220
+ class XhtmlFormat < HtmlFormat
221
+ Formatter = HtmlFormat::Formatter.dup
222
+ setup_new_formatter(Formatter, XhtmlElement)
223
+ end
224
+
225
+ class Xhtml5Format < XhtmlFormat
226
+ Formatter = HtmlFormat::Formatter.dup
227
+ setup_new_formatter(Formatter, Xhtml5Element)
228
+ end
229
+ end