pseudohikiparser 0.0.3 → 0.0.4.develop

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,19 +5,16 @@ require 'pseudohiki/inlineparser'
5
5
 
6
6
  module PseudoHiki
7
7
  class BlockParser
8
- URI_RE = /(?:(?:https?|ftp|file):|mailto:)[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/ #borrowed from hikidoc
9
8
  ID_TAG_PAT = /\A\[([^\[\]]+)\]/o
10
9
 
11
- module LINE_PAT
12
- VERBATIM_BEGIN = /\A<<<\s*/o
13
- VERBATIM_END = /\A>>>\s*/o
14
- PLUGIN_BEGIN = /\{\{/o
15
- PLUGIN_END = /\}\}/o
16
- end
10
+ VERBATIM_BEGIN = /\A<<<\s*/o
11
+ VERBATIM_END = /\A>>>\s*/o
12
+ PLUGIN_BEGIN = /\{\{/o
13
+ PLUGIN_END = /\}\}/o
17
14
 
18
- ParentNode = {}
15
+ PARENT_NODE = {}
19
16
 
20
- attr_reader :stack
17
+ attr_reader :stack, :auto_linker
21
18
 
22
19
  def self.assign_node_id(leaf, node)
23
20
  # return unless tree[0].kind_of? Array ** block_leaf:[inline_node:[token or inline_node]]
@@ -30,28 +27,35 @@ module PseudoHiki
30
27
  node
31
28
  end
32
29
 
33
- def self.parse(lines)
34
- parser = self.new
30
+ class << self
31
+ attr_accessor :auto_linker
32
+ end
33
+
34
+ def self.parse(lines, tmp_auto_linker=BlockParser.auto_linker)
35
+ parser = new(tmp_auto_linker)
35
36
  parser.read_lines(lines)
36
37
  parser.stack.tree
37
38
  end
38
39
 
39
40
  class BlockStack < TreeStack
40
- def pop
41
- self.current_node.parse_leafs
42
- super
41
+ attr_reader :stack
42
+
43
+ def pop_with_breaker(breaker=nil)
44
+ current_node.parse_leafs(breaker)
45
+ pop
46
+ end
47
+
48
+ def current_heading_level
49
+ i = @stack.rindex {|node| node.kind_of? BlockElement::HeadingNode }
50
+ @stack[i].level || 0
43
51
  end
44
52
  end
45
53
 
46
54
  class BlockLeaf < BlockStack::Leaf
47
- attr_accessor :nominal_level, :node_id
48
-
49
- def self.head_re=(head_regex)
50
- @self_head_re = head_regex
51
- end
55
+ attr_accessor :level, :node_id, :decorator
52
56
 
53
- def self.head_re
54
- @self_head_re
57
+ class << self
58
+ attr_accessor :head_re
55
59
  end
56
60
 
57
61
  def self.with_depth?
@@ -59,8 +63,8 @@ module PseudoHiki
59
63
  end
60
64
 
61
65
  def self.create(line, inline_parser=InlineParser)
62
- line = line.sub(self.head_re, "".freeze) if self.head_re
63
- new.concat(inline_parser.parse(line)) #leaf = self.new
66
+ line = line.sub(head_re, "".freeze) if head_re
67
+ new.concat(inline_parser.parse(line)) # leaf = self.new
64
68
  end
65
69
 
66
70
  def head_re
@@ -68,7 +72,7 @@ module PseudoHiki
68
72
  end
69
73
 
70
74
  def block
71
- @parent_node ||= ParentNode[self.class]
75
+ @parent_node ||= PARENT_NODE[self.class]
72
76
  end
73
77
 
74
78
  def push_block(stack)
@@ -76,7 +80,7 @@ module PseudoHiki
76
80
  end
77
81
 
78
82
  def under_appropriate_block?(stack)
79
- stack.current_node.kind_of? block and stack.current_node.nominal_level == nominal_level
83
+ stack.current_node.kind_of? block and stack.current_node.level == level
80
84
  end
81
85
 
82
86
  def push_self(stack)
@@ -84,10 +88,10 @@ module PseudoHiki
84
88
  super(stack)
85
89
  end
86
90
 
87
- def parse_leafs
88
- parsed = InlineParser.parse(self.join)
89
- self.clear
90
- self.concat(parsed)
91
+ def parse_leafs(breaker)
92
+ parsed = InlineParser.parse(join)
93
+ clear
94
+ concat(parsed)
91
95
  end
92
96
  end
93
97
 
@@ -95,8 +99,8 @@ module PseudoHiki
95
99
  include TreeStack::Mergeable
96
100
 
97
101
  def self.create(line)
98
- line = line.sub(self.head_re, "".freeze) if self.head_re
99
- self.new.tap {|leaf| leaf.push line }
102
+ line = line.sub(head_re, "".freeze) if head_re
103
+ new.tap {|leaf| leaf.push line }
100
104
  end
101
105
 
102
106
  def push_self(stack)
@@ -111,8 +115,8 @@ module PseudoHiki
111
115
 
112
116
  class NestedBlockLeaf < BlockLeaf
113
117
  def self.create(line)
114
- m = self.head_re.match(line)
115
- super(line).tap {|leaf| leaf.nominal_level = m[0].length }
118
+ m = head_re.match(line)
119
+ super(line).tap {|leaf| leaf.level = m[0].length }
116
120
  end
117
121
 
118
122
  def self.with_depth?
@@ -130,8 +134,12 @@ module PseudoHiki
130
134
  class BlockNode < BlockStack::Node
131
135
  attr_accessor :node_id
132
136
 
133
- def nominal_level
134
- first.nominal_level if first # @cached_nominal_level ||= (first.nominal_level if first)
137
+ def level
138
+ first.level if first # @cached_level ||= (first.level if first)
139
+ end
140
+
141
+ def decorator
142
+ first.decorator if first
135
143
  end
136
144
 
137
145
  def push_self(stack)
@@ -140,35 +148,27 @@ module PseudoHiki
140
148
  end
141
149
 
142
150
  def breakable?(breaker)
143
- not (kind_of?(breaker.block) and nominal_level == breaker.nominal_level)
151
+ not (kind_of? breaker.block and level == breaker.level)
144
152
  end
145
153
 
146
- def parse_leafs; end
147
-
148
- def in_link_tag?(preceding_str)
149
- preceding_str[-2, 2] == "[[".freeze or preceding_str[-1, 1] == "|".freeze
150
- end
151
-
152
- def tagfy_link(line)
153
- line.gsub(URI_RE) {|url| in_link_tag?($`) ? url : "[[#{url}]]" }
154
- end
154
+ def parse_leafs(breaker); end
155
155
 
156
156
  def add_leaf(line, blockparser)
157
157
  leaf = create_leaf(line, blockparser)
158
- blockparser.stack.pop while blockparser.breakable?(leaf)
158
+ blockparser.stack.pop_with_breaker(leaf) while blockparser.breakable?(leaf)
159
159
  blockparser.stack.push leaf
160
160
  end
161
161
 
162
162
  def create_leaf(line, blockparser)
163
- return BlockElement::VerbatimLeaf.create("".freeze, true) if LINE_PAT::VERBATIM_BEGIN =~ line
164
- line = tagfy_link(line) if URI_RE =~ line and BlockElement::VerbatimLeaf.head_re !~ line
163
+ return BlockElement::VerbatimLeaf.create("".freeze, true) if VERBATIM_BEGIN =~ line
164
+ line = blockparser.auto_linker.link(line)
165
165
  blockparser.select_leaf_type(line).create(line)
166
166
  end
167
167
  end
168
168
 
169
169
  class NonNestedBlockNode < BlockNode
170
- def parse_leafs
171
- self.each {|leaf| leaf.parse_leafs }
170
+ def parse_leafs(breaker)
171
+ each {|leaf| leaf.parse_leafs(breaker) }
172
172
  end
173
173
  end
174
174
 
@@ -176,99 +176,171 @@ module PseudoHiki
176
176
 
177
177
  class ListTypeBlockNode < NestedBlockNode
178
178
  def breakable?(breaker)
179
- not (breaker.block.superclass == ListTypeBlockNode and nominal_level <= breaker.nominal_level)
179
+ not (breaker.block.superclass == ListTypeBlockNode and level <= breaker.level)
180
180
  end
181
181
  end
182
182
 
183
183
  class ListLeafNode < NestedBlockNode
184
184
  def breakable?(breaker)
185
- not (breaker.kind_of?(ListTypeLeaf) and nominal_level < breaker.nominal_level)
185
+ not (breaker.kind_of? ListTypeLeaf and level < breaker.level)
186
186
  end
187
187
  end
188
188
 
189
+ class UnmatchedSectioningTagError < StandardError; end
190
+
189
191
  module BlockElement
190
192
  {
191
- BlockLeaf => %w(DescLeaf VerbatimLeaf TableLeaf CommentOutLeaf BlockNodeEnd HrLeaf),
193
+ BlockLeaf => %w(DescLeaf VerbatimLeaf TableLeaf CommentOutLeaf BlockNodeEnd HrLeaf DecoratorLeaf SectioningNodeEnd),
192
194
  NonNestedBlockLeaf => %w(QuoteLeaf ParagraphLeaf),
193
195
  NestedBlockLeaf => %w(HeadingLeaf),
194
196
  ListTypeLeaf => %w(ListLeaf EnumLeaf),
195
- BlockNode => %w(DescNode VerbatimNode TableNode CommentOutNode HrNode),
197
+ BlockNode => %w(DescNode VerbatimNode TableNode CommentOutNode HrNode DecoratorNode SectioningNode),
196
198
  NonNestedBlockNode => %w(QuoteNode ParagraphNode),
197
199
  NestedBlockNode => %w(HeadingNode),
198
200
  ListTypeBlockNode => %w(ListNode EnumNode),
199
201
  ListLeafNode => %w(ListWrapNode EnumWrapNode)
200
- }.each do |parent_class, children|
201
- PseudoHiki.subclass_of(parent_class, binding, children)
202
+ }.each do |parent_class, sub_classes|
203
+ sub_classes.each {|sub| const_set(sub, Class.new(parent_class)) }
202
204
  end
203
- end
204
- include BlockElement
205
205
 
206
- class BlockElement::BlockNodeEnd
207
- PARSED_NODE_END = new.concat(InlineParser.parse(""))
206
+ class BlockNodeEnd
207
+ PARSED_NODE_END = new.concat(InlineParser.parse(""))
208
208
 
209
- def push_self(stack); end
209
+ def push_self(stack); end
210
210
 
211
- def self.create(line, inline_parser=InlineParser)
212
- PARSED_NODE_END
211
+ def self.create(line, inline_parser=InlineParser)
212
+ PARSED_NODE_END
213
+ end
213
214
  end
214
- end
215
215
 
216
- class BlockElement::VerbatimNode
217
- attr_accessor :in_block_tag
216
+ class VerbatimNode
217
+ attr_accessor :in_block_tag
218
218
 
219
- def add_leaf(line, blockparser)
220
- return @stack.pop if LINE_PAT::VERBATIM_END =~ line
221
- return super(line, blockparser) unless @in_block_tag
222
- line = " ".concat(line) if BlockElement::BlockNodeEnd.head_re =~ line and not @in_block_tag
223
- @stack.push BlockElement::VerbatimLeaf.create(line, @in_block_tag)
219
+ def add_leaf(line, blockparser)
220
+ return @stack.pop_with_breaker if VERBATIM_END =~ line
221
+ return super(line, blockparser) unless @in_block_tag
222
+ line = " #{line}" if BlockNodeEnd.head_re =~ line and not @in_block_tag
223
+ @stack.push VerbatimLeaf.create(line, @in_block_tag)
224
+ end
224
225
  end
225
- end
226
226
 
227
- class BlockElement::QuoteNode
228
- def parse_leafs
229
- self[0] = BlockParser.parse(self[0])
227
+ class DecoratorNode
228
+ DECORATOR_PAT = /\A(?:([^\[\]:]+))?(?:\[([^\[\]]+)\])?(?::\s*(\S.*))?/o
229
+
230
+ class DecoratorItem < Struct.new(:string, :type, :id, :value)
231
+ def initialize(*args)
232
+ super
233
+ self.value = InlineParser.parse(self.value) if self.value
234
+ end
235
+ end
236
+
237
+ def parse_leafs(breaker)
238
+ decorator = {}
239
+ breaker.decorator = decorator
240
+ @stack.remove_current_node.each do |leaf|
241
+ m = DECORATOR_PAT.match(leaf.join)
242
+ return nil unless m
243
+ item = DecoratorItem.new(*(m.to_a))
244
+ decorator[item.type || :id] = item
245
+ end
246
+ end
247
+
248
+ def breakable?(breaker)
249
+ return super if breaker.kind_of? DecoratorLeaf
250
+ parse_leafs(breaker)
251
+ @stack.current_node.breakable?(breaker)
252
+ end
230
253
  end
231
- end
232
254
 
233
- class BlockElement::HeadingNode
234
- def breakable?(breaker)
235
- kind_of?(breaker.block) and nominal_level >= breaker.nominal_level
255
+ class DecoratorLeaf
256
+ def push_sectioning_node(stack, node_class)
257
+ node = node_class.new
258
+ m = DecoratorNode::DECORATOR_PAT.match(join)
259
+ node.node_id = m[2]
260
+ node.section_level = stack.current_heading_level if node.kind_of? SectioningNode
261
+ stack.push(node)
262
+ end
263
+
264
+ def push_self(stack)
265
+ decorator_type = self[0][0]
266
+ if decorator_type.start_with? "begin[".freeze
267
+ push_sectioning_node(stack, SectioningNode)
268
+ elsif decorator_type.start_with? "end[".freeze
269
+ push_sectioning_node(stack, SectioningNodeEnd)
270
+ else
271
+ super
272
+ end
273
+ end
236
274
  end
237
- end
238
275
 
239
- class BlockElement::VerbatimLeaf
240
- attr_accessor :in_block_tag
276
+ class SectioningNode
277
+ attr_accessor :section_level
241
278
 
242
- def self.create(line, in_block_tag=nil)
243
- line = line.sub(self.head_re, "".freeze) if self.head_re and not in_block_tag
244
- self.new.tap do |leaf|
245
- leaf.push line
246
- leaf.in_block_tag = in_block_tag
279
+ def breakable?(breaker)
280
+ breaker.kind_of? HeadingLeaf and @section_level >= breaker.level
247
281
  end
248
282
  end
249
283
 
250
- def push_block(stack)
251
- stack.push(block.new.tap {|n| n.in_block_tag = @in_block_tag })
284
+ class SectioningNodeEnd
285
+ def push_self(stack)
286
+ n = stack.stack.rindex do |node|
287
+ node.kind_of? SectioningNode and node.node_id == node_id
288
+ end
289
+ raise UnmatchedSectioningTagError unless n
290
+ stack.pop until stack.stack.length == n
291
+ rescue UnmatchedSectioningTagError => e
292
+ STDERR.puts "#{e}: The start tag for '#{node_id}' is not found."
293
+ # FIXME: The handling of this error should be changed appropriately.
294
+ end
252
295
  end
253
- end
254
296
 
255
- class BlockElement::TableLeaf
256
- def self.create(line)
257
- super(line, TableRowParser)
297
+ class QuoteNode
298
+ def parse_leafs(breaker)
299
+ self[0] = BlockParser.parse(self[0], AutoLink::Off)
300
+ end
301
+ end
302
+
303
+ class HeadingNode
304
+ def breakable?(breaker)
305
+ kind_of? breaker.block and level >= breaker.level
306
+ end
307
+ end
308
+
309
+ class VerbatimLeaf
310
+ attr_accessor :in_block_tag
311
+
312
+ def self.create(line, in_block_tag=nil)
313
+ line = line.sub(head_re, "".freeze) if head_re and not in_block_tag
314
+ new.tap do |leaf|
315
+ leaf.push line
316
+ leaf.in_block_tag = in_block_tag
317
+ end
318
+ end
319
+
320
+ def push_block(stack)
321
+ stack.push(block.new.tap {|n| n.in_block_tag = @in_block_tag })
322
+ end
323
+ end
324
+
325
+ class TableLeaf
326
+ def self.create(line)
327
+ super(line, TableRowParser)
328
+ end
258
329
  end
259
330
  end
331
+ include BlockElement
260
332
 
261
333
  class ListTypeLeaf
262
334
  include BlockElement
263
335
 
264
- Wrapper = {
336
+ WRAPPER = {
265
337
  ListLeaf => ListWrapNode,
266
338
  EnumLeaf => EnumWrapNode
267
339
  }
268
340
 
269
341
  def push_self(stack)
270
342
  push_block(stack) unless under_appropriate_block?(stack)
271
- stack.push Wrapper[self.class].new
343
+ stack.push WRAPPER[self.class].new
272
344
  BlockParser.assign_node_id(self[0], stack.current_node)
273
345
  stack.push_as_leaf self
274
346
  end
@@ -283,46 +355,51 @@ module PseudoHiki
283
355
  [ParagraphLeaf, ParagraphNode],
284
356
  [HrLeaf, HrNode],
285
357
  [ListLeaf, ListNode],
286
- [EnumLeaf, EnumNode]
358
+ [EnumLeaf, EnumNode],
359
+ [BlockNodeEnd, BlockNodeEnd], # special case
360
+ [DecoratorLeaf, DecoratorNode]
287
361
  ].each do |leaf, node|
288
- ParentNode[leaf] = node
362
+ PARENT_NODE[leaf] = node
289
363
  end
290
364
 
291
- ParentNode[BlockNodeEnd] = BlockNodeEnd
292
-
293
- def self.assign_head_re
294
- irregular_leafs = [BlockNodeEnd, VerbatimLeaf, HrLeaf]
295
- irregular_head_pats, regular_leaf_types, head_to_leaf = [], [], {}
296
- [['\r?\n?$', BlockNodeEnd],
297
- ['\s', VerbatimLeaf],
298
- ['*', ListLeaf],
299
- ['#', EnumLeaf],
300
- [':', DescLeaf],
301
- ['!', HeadingLeaf],
302
- ['""', QuoteLeaf],
303
- ['||', TableLeaf],
304
- ['//', CommentOutLeaf],
305
- ['----\s*$', HrLeaf]
306
- ].each do |head, leaf|
307
- escaped_head = irregular_leafs.include?(leaf) ? head : Regexp.escape(head)
365
+ head_to_leaf_table = [['\r?\n?$', BlockNodeEnd],
366
+ ['\s', VerbatimLeaf],
367
+ ['*', ListLeaf],
368
+ ['#', EnumLeaf],
369
+ [':', DescLeaf],
370
+ ['!', HeadingLeaf],
371
+ ['""', QuoteLeaf],
372
+ ['||', TableLeaf],
373
+ ['//@', DecoratorLeaf],
374
+ ['//', CommentOutLeaf],
375
+ ['----\s*$', HrLeaf]]
376
+
377
+ IRREGULAR_LEAFS = [:entire_matched_part, BlockNodeEnd, VerbatimLeaf, HrLeaf]
378
+ NUMBER_OF_IRREGULAR_LEAFS = IRREGULAR_LEAFS.length - 1
379
+ HEAD_TO_LEAF = head_to_leaf_table.inject({}) {|h, kv| h[kv[0]] = kv[1]; h }
380
+
381
+ def self.assign_head_re(head_to_leaf_table)
382
+ irregular_head_pats, regular_heads = [], []
383
+ head_to_leaf_table.each do |head, leaf|
384
+ leaf_is_irregular = IRREGULAR_LEAFS.include?(leaf)
385
+ escaped_head = leaf_is_irregular ? head : Regexp.escape(head)
308
386
  head_pat = leaf.with_depth? ? "#{escaped_head}+" : "#{escaped_head}"
309
- leaf.head_re = Regexp.new('\\A'+head_pat)
310
- head_to_leaf[head] = leaf
311
- irregular_head_pats.push "(#{escaped_head})" if irregular_leafs.include?(leaf)
312
- regular_leaf_types.push head unless irregular_leafs.include?(leaf)
387
+ leaf.head_re = /\A#{head_pat}/
388
+ irregular_head_pats.push "(#{escaped_head})" if leaf_is_irregular
389
+ regular_heads.push head unless leaf_is_irregular
313
390
  end
314
- irregular_leaf_types = [:entire_matched_part].concat(irregular_leafs)
315
- return Regexp.new('\\A(?:'+irregular_head_pats.join('|')+')'), regular_leaf_types, head_to_leaf, irregular_leaf_types, irregular_leafs.length
391
+ return /\A(?:#{irregular_head_pats.join('|')})/, regular_heads
316
392
  end
317
393
 
318
- IRREGULAR_HEAD_PAT, REGULAR_LEAF_TYPES, HEAD_TO_LEAF, IRREGULAR_LEAF_TYPES, NUMBER_OF_IRREGULAR_LEAF_TYPES = assign_head_re
394
+ IRREGULAR_HEAD_PAT, REGULAR_HEADS = assign_head_re(head_to_leaf_table)
319
395
 
320
- def initialize
396
+ def initialize(auto_linker=BlockParser.auto_linker)
321
397
  root_node = BlockNode.new
322
398
  def root_node.breakable?(breaker)
323
399
  false
324
400
  end
325
401
  @stack = BlockStack.new(root_node)
402
+ @auto_linker = auto_linker || AutoLink::URL
326
403
  end
327
404
 
328
405
  def breakable?(breaker)
@@ -331,15 +408,46 @@ module PseudoHiki
331
408
 
332
409
  def select_leaf_type(line)
333
410
  matched = IRREGULAR_HEAD_PAT.match(line)
334
- 1.upto(NUMBER_OF_IRREGULAR_LEAF_TYPES) {|i| return IRREGULAR_LEAF_TYPES[i] if matched[i] } if matched
335
- REGULAR_LEAF_TYPES.each {|head| return HEAD_TO_LEAF[head] if line.start_with?(head) }
411
+ 1.upto(NUMBER_OF_IRREGULAR_LEAFS) {|i| return IRREGULAR_LEAFS[i] if matched[i] } if matched
412
+ REGULAR_HEADS.each {|head| return HEAD_TO_LEAF[head] if line.start_with?(head) }
336
413
  ParagraphLeaf
337
414
  end
338
415
 
339
416
  def read_lines(lines)
340
417
  each_line = lines.respond_to?(:each_line) ? :each_line : :each
341
418
  lines.send(each_line) {|line| @stack.current_node.add_leaf(line, self) }
342
- @stack.pop
419
+ @stack.pop_with_breaker
420
+ end
421
+ end
422
+
423
+ module AutoLink
424
+ # URI_RE is borrowed from hikidoc
425
+ URI_RE = /(?:https?|ftp|file|mailto):[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/
426
+ VERBATIM_LEAF_HEAD_RE = BlockParser::BlockElement::VerbatimLeaf.head_re
427
+
428
+ module Off
429
+ def self.link(line) line; end
430
+
431
+ def self.auto_link_url?
432
+ false
433
+ end
434
+ end
435
+
436
+ module URL
437
+ OPEN_TAG, LINK_SEP = "[[", "|"
438
+
439
+ def self.in_link_tag?(preceding_str)
440
+ preceding_str.end_with?(OPEN_TAG) or preceding_str.end_with?(LINK_SEP)
441
+ end
442
+
443
+ def self.link(line)
444
+ return line unless URI_RE =~ line and VERBATIM_LEAF_HEAD_RE !~ line
445
+ line.gsub(URI_RE) {|url| in_link_tag?($`) ? url : "[[#{url}]]" }
446
+ end
447
+
448
+ def self.auto_link_url?
449
+ true
450
+ end
343
451
  end
344
452
  end
345
453
  end