pseudohikiparser 0.0.3 → 0.0.4.develop

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,19 +5,16 @@ require 'pseudohiki/inlineparser'
5
5
 
6
6
  module PseudoHiki
7
7
  class BlockParser
8
- URI_RE = /(?:(?:https?|ftp|file):|mailto:)[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/ #borrowed from hikidoc
9
8
  ID_TAG_PAT = /\A\[([^\[\]]+)\]/o
10
9
 
11
- module LINE_PAT
12
- VERBATIM_BEGIN = /\A<<<\s*/o
13
- VERBATIM_END = /\A>>>\s*/o
14
- PLUGIN_BEGIN = /\{\{/o
15
- PLUGIN_END = /\}\}/o
16
- end
10
+ VERBATIM_BEGIN = /\A<<<\s*/o
11
+ VERBATIM_END = /\A>>>\s*/o
12
+ PLUGIN_BEGIN = /\{\{/o
13
+ PLUGIN_END = /\}\}/o
17
14
 
18
- ParentNode = {}
15
+ PARENT_NODE = {}
19
16
 
20
- attr_reader :stack
17
+ attr_reader :stack, :auto_linker
21
18
 
22
19
  def self.assign_node_id(leaf, node)
23
20
  # return unless tree[0].kind_of? Array ** block_leaf:[inline_node:[token or inline_node]]
@@ -30,28 +27,35 @@ module PseudoHiki
30
27
  node
31
28
  end
32
29
 
33
- def self.parse(lines)
34
- parser = self.new
30
+ class << self
31
+ attr_accessor :auto_linker
32
+ end
33
+
34
+ def self.parse(lines, tmp_auto_linker=BlockParser.auto_linker)
35
+ parser = new(tmp_auto_linker)
35
36
  parser.read_lines(lines)
36
37
  parser.stack.tree
37
38
  end
38
39
 
39
40
  class BlockStack < TreeStack
40
- def pop
41
- self.current_node.parse_leafs
42
- super
41
+ attr_reader :stack
42
+
43
+ def pop_with_breaker(breaker=nil)
44
+ current_node.parse_leafs(breaker)
45
+ pop
46
+ end
47
+
48
+ def current_heading_level
49
+ i = @stack.rindex {|node| node.kind_of? BlockElement::HeadingNode }
50
+ @stack[i].level || 0
43
51
  end
44
52
  end
45
53
 
46
54
  class BlockLeaf < BlockStack::Leaf
47
- attr_accessor :nominal_level, :node_id
48
-
49
- def self.head_re=(head_regex)
50
- @self_head_re = head_regex
51
- end
55
+ attr_accessor :level, :node_id, :decorator
52
56
 
53
- def self.head_re
54
- @self_head_re
57
+ class << self
58
+ attr_accessor :head_re
55
59
  end
56
60
 
57
61
  def self.with_depth?
@@ -59,8 +63,8 @@ module PseudoHiki
59
63
  end
60
64
 
61
65
  def self.create(line, inline_parser=InlineParser)
62
- line = line.sub(self.head_re, "".freeze) if self.head_re
63
- new.concat(inline_parser.parse(line)) #leaf = self.new
66
+ line = line.sub(head_re, "".freeze) if head_re
67
+ new.concat(inline_parser.parse(line)) # leaf = self.new
64
68
  end
65
69
 
66
70
  def head_re
@@ -68,7 +72,7 @@ module PseudoHiki
68
72
  end
69
73
 
70
74
  def block
71
- @parent_node ||= ParentNode[self.class]
75
+ @parent_node ||= PARENT_NODE[self.class]
72
76
  end
73
77
 
74
78
  def push_block(stack)
@@ -76,7 +80,7 @@ module PseudoHiki
76
80
  end
77
81
 
78
82
  def under_appropriate_block?(stack)
79
- stack.current_node.kind_of? block and stack.current_node.nominal_level == nominal_level
83
+ stack.current_node.kind_of? block and stack.current_node.level == level
80
84
  end
81
85
 
82
86
  def push_self(stack)
@@ -84,10 +88,10 @@ module PseudoHiki
84
88
  super(stack)
85
89
  end
86
90
 
87
- def parse_leafs
88
- parsed = InlineParser.parse(self.join)
89
- self.clear
90
- self.concat(parsed)
91
+ def parse_leafs(breaker)
92
+ parsed = InlineParser.parse(join)
93
+ clear
94
+ concat(parsed)
91
95
  end
92
96
  end
93
97
 
@@ -95,8 +99,8 @@ module PseudoHiki
95
99
  include TreeStack::Mergeable
96
100
 
97
101
  def self.create(line)
98
- line = line.sub(self.head_re, "".freeze) if self.head_re
99
- self.new.tap {|leaf| leaf.push line }
102
+ line = line.sub(head_re, "".freeze) if head_re
103
+ new.tap {|leaf| leaf.push line }
100
104
  end
101
105
 
102
106
  def push_self(stack)
@@ -111,8 +115,8 @@ module PseudoHiki
111
115
 
112
116
  class NestedBlockLeaf < BlockLeaf
113
117
  def self.create(line)
114
- m = self.head_re.match(line)
115
- super(line).tap {|leaf| leaf.nominal_level = m[0].length }
118
+ m = head_re.match(line)
119
+ super(line).tap {|leaf| leaf.level = m[0].length }
116
120
  end
117
121
 
118
122
  def self.with_depth?
@@ -130,8 +134,12 @@ module PseudoHiki
130
134
  class BlockNode < BlockStack::Node
131
135
  attr_accessor :node_id
132
136
 
133
- def nominal_level
134
- first.nominal_level if first # @cached_nominal_level ||= (first.nominal_level if first)
137
+ def level
138
+ first.level if first # @cached_level ||= (first.level if first)
139
+ end
140
+
141
+ def decorator
142
+ first.decorator if first
135
143
  end
136
144
 
137
145
  def push_self(stack)
@@ -140,35 +148,27 @@ module PseudoHiki
140
148
  end
141
149
 
142
150
  def breakable?(breaker)
143
- not (kind_of?(breaker.block) and nominal_level == breaker.nominal_level)
151
+ not (kind_of? breaker.block and level == breaker.level)
144
152
  end
145
153
 
146
- def parse_leafs; end
147
-
148
- def in_link_tag?(preceding_str)
149
- preceding_str[-2, 2] == "[[".freeze or preceding_str[-1, 1] == "|".freeze
150
- end
151
-
152
- def tagfy_link(line)
153
- line.gsub(URI_RE) {|url| in_link_tag?($`) ? url : "[[#{url}]]" }
154
- end
154
+ def parse_leafs(breaker); end
155
155
 
156
156
  def add_leaf(line, blockparser)
157
157
  leaf = create_leaf(line, blockparser)
158
- blockparser.stack.pop while blockparser.breakable?(leaf)
158
+ blockparser.stack.pop_with_breaker(leaf) while blockparser.breakable?(leaf)
159
159
  blockparser.stack.push leaf
160
160
  end
161
161
 
162
162
  def create_leaf(line, blockparser)
163
- return BlockElement::VerbatimLeaf.create("".freeze, true) if LINE_PAT::VERBATIM_BEGIN =~ line
164
- line = tagfy_link(line) if URI_RE =~ line and BlockElement::VerbatimLeaf.head_re !~ line
163
+ return BlockElement::VerbatimLeaf.create("".freeze, true) if VERBATIM_BEGIN =~ line
164
+ line = blockparser.auto_linker.link(line)
165
165
  blockparser.select_leaf_type(line).create(line)
166
166
  end
167
167
  end
168
168
 
169
169
  class NonNestedBlockNode < BlockNode
170
- def parse_leafs
171
- self.each {|leaf| leaf.parse_leafs }
170
+ def parse_leafs(breaker)
171
+ each {|leaf| leaf.parse_leafs(breaker) }
172
172
  end
173
173
  end
174
174
 
@@ -176,99 +176,171 @@ module PseudoHiki
176
176
 
177
177
  class ListTypeBlockNode < NestedBlockNode
178
178
  def breakable?(breaker)
179
- not (breaker.block.superclass == ListTypeBlockNode and nominal_level <= breaker.nominal_level)
179
+ not (breaker.block.superclass == ListTypeBlockNode and level <= breaker.level)
180
180
  end
181
181
  end
182
182
 
183
183
  class ListLeafNode < NestedBlockNode
184
184
  def breakable?(breaker)
185
- not (breaker.kind_of?(ListTypeLeaf) and nominal_level < breaker.nominal_level)
185
+ not (breaker.kind_of? ListTypeLeaf and level < breaker.level)
186
186
  end
187
187
  end
188
188
 
189
+ class UnmatchedSectioningTagError < StandardError; end
190
+
189
191
  module BlockElement
190
192
  {
191
- BlockLeaf => %w(DescLeaf VerbatimLeaf TableLeaf CommentOutLeaf BlockNodeEnd HrLeaf),
193
+ BlockLeaf => %w(DescLeaf VerbatimLeaf TableLeaf CommentOutLeaf BlockNodeEnd HrLeaf DecoratorLeaf SectioningNodeEnd),
192
194
  NonNestedBlockLeaf => %w(QuoteLeaf ParagraphLeaf),
193
195
  NestedBlockLeaf => %w(HeadingLeaf),
194
196
  ListTypeLeaf => %w(ListLeaf EnumLeaf),
195
- BlockNode => %w(DescNode VerbatimNode TableNode CommentOutNode HrNode),
197
+ BlockNode => %w(DescNode VerbatimNode TableNode CommentOutNode HrNode DecoratorNode SectioningNode),
196
198
  NonNestedBlockNode => %w(QuoteNode ParagraphNode),
197
199
  NestedBlockNode => %w(HeadingNode),
198
200
  ListTypeBlockNode => %w(ListNode EnumNode),
199
201
  ListLeafNode => %w(ListWrapNode EnumWrapNode)
200
- }.each do |parent_class, children|
201
- PseudoHiki.subclass_of(parent_class, binding, children)
202
+ }.each do |parent_class, sub_classes|
203
+ sub_classes.each {|sub| const_set(sub, Class.new(parent_class)) }
202
204
  end
203
- end
204
- include BlockElement
205
205
 
206
- class BlockElement::BlockNodeEnd
207
- PARSED_NODE_END = new.concat(InlineParser.parse(""))
206
+ class BlockNodeEnd
207
+ PARSED_NODE_END = new.concat(InlineParser.parse(""))
208
208
 
209
- def push_self(stack); end
209
+ def push_self(stack); end
210
210
 
211
- def self.create(line, inline_parser=InlineParser)
212
- PARSED_NODE_END
211
+ def self.create(line, inline_parser=InlineParser)
212
+ PARSED_NODE_END
213
+ end
213
214
  end
214
- end
215
215
 
216
- class BlockElement::VerbatimNode
217
- attr_accessor :in_block_tag
216
+ class VerbatimNode
217
+ attr_accessor :in_block_tag
218
218
 
219
- def add_leaf(line, blockparser)
220
- return @stack.pop if LINE_PAT::VERBATIM_END =~ line
221
- return super(line, blockparser) unless @in_block_tag
222
- line = " ".concat(line) if BlockElement::BlockNodeEnd.head_re =~ line and not @in_block_tag
223
- @stack.push BlockElement::VerbatimLeaf.create(line, @in_block_tag)
219
+ def add_leaf(line, blockparser)
220
+ return @stack.pop_with_breaker if VERBATIM_END =~ line
221
+ return super(line, blockparser) unless @in_block_tag
222
+ line = " #{line}" if BlockNodeEnd.head_re =~ line and not @in_block_tag
223
+ @stack.push VerbatimLeaf.create(line, @in_block_tag)
224
+ end
224
225
  end
225
- end
226
226
 
227
- class BlockElement::QuoteNode
228
- def parse_leafs
229
- self[0] = BlockParser.parse(self[0])
227
+ class DecoratorNode
228
+ DECORATOR_PAT = /\A(?:([^\[\]:]+))?(?:\[([^\[\]]+)\])?(?::\s*(\S.*))?/o
229
+
230
+ class DecoratorItem < Struct.new(:string, :type, :id, :value)
231
+ def initialize(*args)
232
+ super
233
+ self.value = InlineParser.parse(self.value) if self.value
234
+ end
235
+ end
236
+
237
+ def parse_leafs(breaker)
238
+ decorator = {}
239
+ breaker.decorator = decorator
240
+ @stack.remove_current_node.each do |leaf|
241
+ m = DECORATOR_PAT.match(leaf.join)
242
+ return nil unless m
243
+ item = DecoratorItem.new(*(m.to_a))
244
+ decorator[item.type || :id] = item
245
+ end
246
+ end
247
+
248
+ def breakable?(breaker)
249
+ return super if breaker.kind_of? DecoratorLeaf
250
+ parse_leafs(breaker)
251
+ @stack.current_node.breakable?(breaker)
252
+ end
230
253
  end
231
- end
232
254
 
233
- class BlockElement::HeadingNode
234
- def breakable?(breaker)
235
- kind_of?(breaker.block) and nominal_level >= breaker.nominal_level
255
+ class DecoratorLeaf
256
+ def push_sectioning_node(stack, node_class)
257
+ node = node_class.new
258
+ m = DecoratorNode::DECORATOR_PAT.match(join)
259
+ node.node_id = m[2]
260
+ node.section_level = stack.current_heading_level if node.kind_of? SectioningNode
261
+ stack.push(node)
262
+ end
263
+
264
+ def push_self(stack)
265
+ decorator_type = self[0][0]
266
+ if decorator_type.start_with? "begin[".freeze
267
+ push_sectioning_node(stack, SectioningNode)
268
+ elsif decorator_type.start_with? "end[".freeze
269
+ push_sectioning_node(stack, SectioningNodeEnd)
270
+ else
271
+ super
272
+ end
273
+ end
236
274
  end
237
- end
238
275
 
239
- class BlockElement::VerbatimLeaf
240
- attr_accessor :in_block_tag
276
+ class SectioningNode
277
+ attr_accessor :section_level
241
278
 
242
- def self.create(line, in_block_tag=nil)
243
- line = line.sub(self.head_re, "".freeze) if self.head_re and not in_block_tag
244
- self.new.tap do |leaf|
245
- leaf.push line
246
- leaf.in_block_tag = in_block_tag
279
+ def breakable?(breaker)
280
+ breaker.kind_of? HeadingLeaf and @section_level >= breaker.level
247
281
  end
248
282
  end
249
283
 
250
- def push_block(stack)
251
- stack.push(block.new.tap {|n| n.in_block_tag = @in_block_tag })
284
+ class SectioningNodeEnd
285
+ def push_self(stack)
286
+ n = stack.stack.rindex do |node|
287
+ node.kind_of? SectioningNode and node.node_id == node_id
288
+ end
289
+ raise UnmatchedSectioningTagError unless n
290
+ stack.pop until stack.stack.length == n
291
+ rescue UnmatchedSectioningTagError => e
292
+ STDERR.puts "#{e}: The start tag for '#{node_id}' is not found."
293
+ # FIXME: The handling of this error should be changed appropriately.
294
+ end
252
295
  end
253
- end
254
296
 
255
- class BlockElement::TableLeaf
256
- def self.create(line)
257
- super(line, TableRowParser)
297
+ class QuoteNode
298
+ def parse_leafs(breaker)
299
+ self[0] = BlockParser.parse(self[0], AutoLink::Off)
300
+ end
301
+ end
302
+
303
+ class HeadingNode
304
+ def breakable?(breaker)
305
+ kind_of? breaker.block and level >= breaker.level
306
+ end
307
+ end
308
+
309
+ class VerbatimLeaf
310
+ attr_accessor :in_block_tag
311
+
312
+ def self.create(line, in_block_tag=nil)
313
+ line = line.sub(head_re, "".freeze) if head_re and not in_block_tag
314
+ new.tap do |leaf|
315
+ leaf.push line
316
+ leaf.in_block_tag = in_block_tag
317
+ end
318
+ end
319
+
320
+ def push_block(stack)
321
+ stack.push(block.new.tap {|n| n.in_block_tag = @in_block_tag })
322
+ end
323
+ end
324
+
325
+ class TableLeaf
326
+ def self.create(line)
327
+ super(line, TableRowParser)
328
+ end
258
329
  end
259
330
  end
331
+ include BlockElement
260
332
 
261
333
  class ListTypeLeaf
262
334
  include BlockElement
263
335
 
264
- Wrapper = {
336
+ WRAPPER = {
265
337
  ListLeaf => ListWrapNode,
266
338
  EnumLeaf => EnumWrapNode
267
339
  }
268
340
 
269
341
  def push_self(stack)
270
342
  push_block(stack) unless under_appropriate_block?(stack)
271
- stack.push Wrapper[self.class].new
343
+ stack.push WRAPPER[self.class].new
272
344
  BlockParser.assign_node_id(self[0], stack.current_node)
273
345
  stack.push_as_leaf self
274
346
  end
@@ -283,46 +355,51 @@ module PseudoHiki
283
355
  [ParagraphLeaf, ParagraphNode],
284
356
  [HrLeaf, HrNode],
285
357
  [ListLeaf, ListNode],
286
- [EnumLeaf, EnumNode]
358
+ [EnumLeaf, EnumNode],
359
+ [BlockNodeEnd, BlockNodeEnd], # special case
360
+ [DecoratorLeaf, DecoratorNode]
287
361
  ].each do |leaf, node|
288
- ParentNode[leaf] = node
362
+ PARENT_NODE[leaf] = node
289
363
  end
290
364
 
291
- ParentNode[BlockNodeEnd] = BlockNodeEnd
292
-
293
- def self.assign_head_re
294
- irregular_leafs = [BlockNodeEnd, VerbatimLeaf, HrLeaf]
295
- irregular_head_pats, regular_leaf_types, head_to_leaf = [], [], {}
296
- [['\r?\n?$', BlockNodeEnd],
297
- ['\s', VerbatimLeaf],
298
- ['*', ListLeaf],
299
- ['#', EnumLeaf],
300
- [':', DescLeaf],
301
- ['!', HeadingLeaf],
302
- ['""', QuoteLeaf],
303
- ['||', TableLeaf],
304
- ['//', CommentOutLeaf],
305
- ['----\s*$', HrLeaf]
306
- ].each do |head, leaf|
307
- escaped_head = irregular_leafs.include?(leaf) ? head : Regexp.escape(head)
365
+ head_to_leaf_table = [['\r?\n?$', BlockNodeEnd],
366
+ ['\s', VerbatimLeaf],
367
+ ['*', ListLeaf],
368
+ ['#', EnumLeaf],
369
+ [':', DescLeaf],
370
+ ['!', HeadingLeaf],
371
+ ['""', QuoteLeaf],
372
+ ['||', TableLeaf],
373
+ ['//@', DecoratorLeaf],
374
+ ['//', CommentOutLeaf],
375
+ ['----\s*$', HrLeaf]]
376
+
377
+ IRREGULAR_LEAFS = [:entire_matched_part, BlockNodeEnd, VerbatimLeaf, HrLeaf]
378
+ NUMBER_OF_IRREGULAR_LEAFS = IRREGULAR_LEAFS.length - 1
379
+ HEAD_TO_LEAF = head_to_leaf_table.inject({}) {|h, kv| h[kv[0]] = kv[1]; h }
380
+
381
+ def self.assign_head_re(head_to_leaf_table)
382
+ irregular_head_pats, regular_heads = [], []
383
+ head_to_leaf_table.each do |head, leaf|
384
+ leaf_is_irregular = IRREGULAR_LEAFS.include?(leaf)
385
+ escaped_head = leaf_is_irregular ? head : Regexp.escape(head)
308
386
  head_pat = leaf.with_depth? ? "#{escaped_head}+" : "#{escaped_head}"
309
- leaf.head_re = Regexp.new('\\A'+head_pat)
310
- head_to_leaf[head] = leaf
311
- irregular_head_pats.push "(#{escaped_head})" if irregular_leafs.include?(leaf)
312
- regular_leaf_types.push head unless irregular_leafs.include?(leaf)
387
+ leaf.head_re = /\A#{head_pat}/
388
+ irregular_head_pats.push "(#{escaped_head})" if leaf_is_irregular
389
+ regular_heads.push head unless leaf_is_irregular
313
390
  end
314
- irregular_leaf_types = [:entire_matched_part].concat(irregular_leafs)
315
- return Regexp.new('\\A(?:'+irregular_head_pats.join('|')+')'), regular_leaf_types, head_to_leaf, irregular_leaf_types, irregular_leafs.length
391
+ return /\A(?:#{irregular_head_pats.join('|')})/, regular_heads
316
392
  end
317
393
 
318
- IRREGULAR_HEAD_PAT, REGULAR_LEAF_TYPES, HEAD_TO_LEAF, IRREGULAR_LEAF_TYPES, NUMBER_OF_IRREGULAR_LEAF_TYPES = assign_head_re
394
+ IRREGULAR_HEAD_PAT, REGULAR_HEADS = assign_head_re(head_to_leaf_table)
319
395
 
320
- def initialize
396
+ def initialize(auto_linker=BlockParser.auto_linker)
321
397
  root_node = BlockNode.new
322
398
  def root_node.breakable?(breaker)
323
399
  false
324
400
  end
325
401
  @stack = BlockStack.new(root_node)
402
+ @auto_linker = auto_linker || AutoLink::URL
326
403
  end
327
404
 
328
405
  def breakable?(breaker)
@@ -331,15 +408,46 @@ module PseudoHiki
331
408
 
332
409
  def select_leaf_type(line)
333
410
  matched = IRREGULAR_HEAD_PAT.match(line)
334
- 1.upto(NUMBER_OF_IRREGULAR_LEAF_TYPES) {|i| return IRREGULAR_LEAF_TYPES[i] if matched[i] } if matched
335
- REGULAR_LEAF_TYPES.each {|head| return HEAD_TO_LEAF[head] if line.start_with?(head) }
411
+ 1.upto(NUMBER_OF_IRREGULAR_LEAFS) {|i| return IRREGULAR_LEAFS[i] if matched[i] } if matched
412
+ REGULAR_HEADS.each {|head| return HEAD_TO_LEAF[head] if line.start_with?(head) }
336
413
  ParagraphLeaf
337
414
  end
338
415
 
339
416
  def read_lines(lines)
340
417
  each_line = lines.respond_to?(:each_line) ? :each_line : :each
341
418
  lines.send(each_line) {|line| @stack.current_node.add_leaf(line, self) }
342
- @stack.pop
419
+ @stack.pop_with_breaker
420
+ end
421
+ end
422
+
423
+ module AutoLink
424
+ # URI_RE is borrowed from hikidoc
425
+ URI_RE = /(?:https?|ftp|file|mailto):[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/
426
+ VERBATIM_LEAF_HEAD_RE = BlockParser::BlockElement::VerbatimLeaf.head_re
427
+
428
+ module Off
429
+ def self.link(line) line; end
430
+
431
+ def self.auto_link_url?
432
+ false
433
+ end
434
+ end
435
+
436
+ module URL
437
+ OPEN_TAG, LINK_SEP = "[[", "|"
438
+
439
+ def self.in_link_tag?(preceding_str)
440
+ preceding_str.end_with?(OPEN_TAG) or preceding_str.end_with?(LINK_SEP)
441
+ end
442
+
443
+ def self.link(line)
444
+ return line unless URI_RE =~ line and VERBATIM_LEAF_HEAD_RE !~ line
445
+ line.gsub(URI_RE) {|url| in_link_tag?($`) ? url : "[[#{url}]]" }
446
+ end
447
+
448
+ def self.auto_link_url?
449
+ true
450
+ end
343
451
  end
344
452
  end
345
453
  end