pseudohikiparser 0.0.0.6.develop → 0.0.0.7.develop

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,369 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+ require 'pseudohiki/htmlformat'
6
+ require 'pseudohiki/plaintextformat'
7
+ require 'htmlelement'
8
+ require 'ostruct'
9
+
10
+ module PseudoHiki
11
+ class MarkDownFormat
12
+ include InlineParser::InlineElement
13
+ include TableRowParser::InlineElement
14
+ include BlockParser::BlockElement
15
+
16
+ def initialize(formatter={}, options={ :strict_mode=> false, :gfm_style => false })
17
+ @formatter = formatter
18
+ options_given_via_block = nil
19
+ if block_given?
20
+ options_given_via_block = yield
21
+ options.merge!(options_given_via_block)
22
+ end
23
+ @options = OpenStruct.new(options)
24
+ end
25
+
26
+ def create_self_element(tree=nil)
27
+ HtmlElement::Children.new
28
+ end
29
+
30
+ def visited_result(node)
31
+ visitor = @formatter[node.class]||@formatter[PlainNode]
32
+ node.accept(visitor)
33
+ end
34
+
35
+ def push_visited_results(element, tree)
36
+ tree.each {|token| element.push visited_result(token) }
37
+ end
38
+
39
+ def visit(tree)
40
+ element = create_self_element(tree)
41
+ push_visited_results(element, tree)
42
+ element
43
+ end
44
+
45
+ def get_plain
46
+ @formatter[PlainNode]
47
+ end
48
+
49
+ def format(tree)
50
+ formatter = get_plain
51
+ tree.accept(formatter).join
52
+ end
53
+
54
+ def list_mark(tree, mark)
55
+ mark = mark.dup
56
+ mark << " " if /^ /o !~ tree.join
57
+ " " * (tree.nominal_level - 1) * 2 + mark
58
+ end
59
+
60
+ def enclose_in(element, mark)
61
+ element.push mark
62
+ element.unshift mark
63
+ end
64
+
65
+ def remove_trailing_newlines_in_html_element(element)
66
+ element.to_s.gsub(/([^>])\r?\n/, "\\1") << $/
67
+ end
68
+
69
+ def self.create(options={ :strict_mode => false })
70
+ formatter = {}
71
+ main_formatter = self.new(formatter, options)
72
+ formatter.default = main_formatter
73
+
74
+ # formatter[PlainNode] = PlainNodeFormatter.new(formatter, options)
75
+ # formatter[InlineNode] = InlineNodeFormatter.new(formatter, options)
76
+ formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
77
+ formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
78
+ formatter[EmNode] = EmNodeFormatter.new(formatter, options)
79
+ formatter[StrongNode] = StrongNodeFormatter.new(formatter, options)
80
+ formatter[DelNode] = DelNodeFormatter.new(formatter, options)
81
+ # formatter[PluginNode] = PluginNodeFormatter.new(formatter, options)
82
+ # formatter[DescLeaf] = DescLeafFormatter.new(formatter, options)
83
+ # formatter[TableCellNode] = TableCellNodeFormatter.new(formatter, options)
84
+ formatter[VerbatimLeaf] = VerbatimLeafFormatter.new(formatter, options)
85
+ # formatter[QuoteLeaf] = QuoteLeafFormatter.new(formatter, options)
86
+ # formatter[TableLeaf] = TableLeafFormatter.new(formatter, options)
87
+ formatter[CommentOutLeaf] = CommentOutLeafFormatter.new(formatter, options)
88
+ formatter[HeadingLeaf] = HeadingLeafFormatter.new(formatter, options)
89
+ # formatter[ParagraphLeaf] = ParagraphLeafFormatter.new(formatter, options)
90
+ formatter[HrLeaf] = HrLeafFormatter.new(formatter, options)
91
+ # formatter[BlockNodeEnd] = BlockNodeEndFormatter.new(formatter, options)
92
+ # formatter[ListLeaf] = ListLeafFormatter.new(formatter, options)
93
+ # formatter[EnumLeaf] = EnumLeafFormatter.new(formatter, options)
94
+ formatter[DescNode] = DescNodeFormatter.new(formatter, options)
95
+ formatter[VerbatimNode] = VerbatimNodeFormatter.new(formatter, options)
96
+ formatter[QuoteNode] = QuoteNodeFormatter.new(formatter, options)
97
+ formatter[TableNode] = TableNodeFormatter.new(formatter, options)
98
+ # formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
99
+ formatter[HeadingNode] = HeadingNodeFormatter.new(formatter, options)
100
+ formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
101
+ # formatter[HrNode] = HrNodeFormatter.new(formatter, options)
102
+ formatter[ListNode] = ListNodeFormatter.new(formatter, options)
103
+ formatter[EnumNode] = EnumNodeFormatter.new(formatter, options)
104
+ formatter[ListWrapNode] = ListWrapNodeFormatter.new(formatter, options)
105
+ formatter[EnumWrapNode] = EnumWrapNodeFormatter.new(formatter, options)
106
+
107
+ main_formatter
108
+ end
109
+
110
+ ## Definitions of subclasses of MarkDownFormat begins here.
111
+
112
+ # class PlainNodeFormatter < self; end
113
+ # class InlineNodeFormatter < self; end
114
+
115
+ class InlineLeafFormatter < self
116
+ def visit(leaf)
117
+ leaf.map {|str| str.gsub(/([_*])/o, "\\\\\\1") }
118
+ end
119
+ end
120
+
121
+ class LinkNodeFormatter < self
122
+ def visit(tree)
123
+ tree = tree.dup
124
+ element = create_self_element
125
+ caption = get_caption(tree)
126
+ begin
127
+ ref = tree.last.join
128
+ rescue NoMethodError
129
+ raise NoMethodError unless tree.empty?
130
+ STDERR.puts "No uri is specified for #{caption}"
131
+ end
132
+ element.push "!" if ImageSuffix =~ ref
133
+ element.push "[#{(caption||tree).join}](#{tree.join})"
134
+ element
135
+ end
136
+
137
+ def get_caption(tree)
138
+ link_sep_index = tree.find_index([LinkSep])
139
+ return nil unless link_sep_index
140
+ caption_part = tree.shift(link_sep_index)
141
+ tree.shift
142
+ caption_part.map {|element| visited_result(element) }
143
+ end
144
+ end
145
+
146
+ class EmNodeFormatter < self
147
+ def visit(tree)
148
+ super(tree).tap do |element|
149
+ enclose_in(element, "_")
150
+ end
151
+ end
152
+ end
153
+
154
+ class StrongNodeFormatter < self
155
+ def visit(tree)
156
+ super(tree).tap do |element|
157
+ enclose_in(element, "**")
158
+ end
159
+ end
160
+ end
161
+
162
+ class DelNodeFormatter < self
163
+ def visit(tree)
164
+ "~~#{super(tree).join.strip}~~"
165
+ end
166
+ end
167
+
168
+ # class PluginNodeFormatter < self; end
169
+ # class DescLeafFormatter < self; end
170
+ # class TableCellNodeFormatter < self; end
171
+
172
+ class VerbatimLeafFormatter < InlineLeafFormatter
173
+ def visit(leaf)
174
+ leaf.join
175
+ end
176
+ end
177
+
178
+ # class QuoteLeafFormatter < self; end
179
+ # class TableLeafFormatter < self; end
180
+
181
+ class CommentOutLeafFormatter < self
182
+ def visit(tree); ""; end
183
+ end
184
+
185
+ class HeadingLeafFormatter < self
186
+ def visit(tree)
187
+ super(tree).tap {|element| element.push $/ }
188
+ end
189
+ end
190
+ # class ParagraphLeafFormatter < self; end
191
+
192
+ class HrLeafFormatter < self
193
+ def visit(tree)
194
+ "----#{$/}"
195
+ end
196
+ end
197
+
198
+ # class BlockNodeEndFormatter < self; end
199
+ # class ListLeafFormatter < self; end
200
+ # class EnumLeafFormatter < self; end
201
+ class DescNodeFormatter < self
202
+ def visit(tree)
203
+ desc_list = HtmlFormat.format(tree)
204
+ remove_trailing_newlines_in_html_element(desc_list)
205
+ end
206
+ end
207
+
208
+ class VerbatimNodeFormatter < self
209
+ def visit(tree)
210
+ element = super(tree)
211
+ return gfm_verbatim(element) if @options.gfm_style
212
+ md_verbatim(element)
213
+ end
214
+
215
+ def gfm_verbatim(element)
216
+ element.tap do |lines|
217
+ lines.unshift "```#{$/}"
218
+ lines.push "```#{$/ * 2}"
219
+ end
220
+ end
221
+
222
+ def md_verbatim(element)
223
+ element.join.gsub(/^/o, " ").sub(/ \Z/o, "").concat $/
224
+ end
225
+ end
226
+
227
+ class QuoteNodeFormatter < self
228
+ def visit(tree)
229
+ element = super(tree)
230
+ element.join.gsub(/^/o, "> ").sub(/> \Z/o, "")
231
+ end
232
+ end
233
+
234
+ class TableNodeFormatter < PlainTextFormat::TableNodeFormatter
235
+ class NotConformantStyleError < StandardError; end
236
+
237
+ def visit(tree)
238
+ @options.gfm_conformant = check_conformance_with_gfm_style(tree)
239
+ super(tree)
240
+ end
241
+
242
+ def deep_copy_tree(tree)
243
+ tree.dup.clear.tap do |new_tree|
244
+ new_tree.concat tree.map {|node| node.dup }
245
+ end
246
+ end
247
+
248
+ def choose_expander_of_col_and_row
249
+ ["", ""]
250
+ end
251
+
252
+ def format_gfm_table(table)
253
+ cell_width = calculate_cell_width(table)
254
+ header_delimiter = cell_width.map {|width| "-" * width }
255
+ cell_formats = cell_width.map {|width| "%-#{width}s" }
256
+ table[1,0] = [header_delimiter]
257
+ table.map do |row|
258
+ formatted_row = row.zip(cell_formats).map do |cell, format|
259
+ format%[cell]
260
+ end
261
+ "|#{formatted_row.join("|") }|#{$/}"
262
+ end.join
263
+ end
264
+
265
+ def format_html_table(tree)
266
+ table = HtmlElement.create("table").tap do |element|
267
+ element.push HtmlFormat.format(tree)
268
+ end.to_s
269
+ @formatter[PlainNode].remove_trailing_newlines_in_html_element(table)
270
+ end
271
+
272
+ def format_table(table, tree)
273
+ return format_html_table(tree) unless @options.gfm_style
274
+ return format_gfm_table(table) if @options.gfm_conformant
275
+
276
+ if @options.gfm_style == :force
277
+ begin
278
+ raise NotConformantStyleError.new("The table is not conformant to GFM style. The first row will be treated as a header row.")
279
+ rescue
280
+ STDERR.puts "The table is not conformant to GFM style. The first row will be treated as a header row."
281
+ end
282
+ return format_gfm_table(table)
283
+ end
284
+
285
+ format_html_table(tree)
286
+ end
287
+
288
+ def calculate_cell_width(table)
289
+ cell_width = Array.new(table.first.length, 0)
290
+ table.each do |row|
291
+ row.each_with_index do |cell, i|
292
+ cell_width[i] = cell.length if cell_width[i] < cell.length
293
+ end
294
+ end
295
+ cell_width
296
+ end
297
+
298
+ def check_conformance_with_gfm_style(rows)
299
+ rows.each_with_index do |row, i|
300
+ row.each do |cell|
301
+ return false if cell.rowspan > 1 or cell.colspan > 1
302
+ if i == 0
303
+ return false unless cell.cell_type == "th"
304
+ else
305
+ return false if cell.cell_type == "th"
306
+ end
307
+ end
308
+ end
309
+ true
310
+ end
311
+ end
312
+
313
+ # class CommentOutNodeFormatter < self; end
314
+
315
+ class HeadingNodeFormatter < self
316
+ def visit(tree)
317
+ super(tree).tap do |element|
318
+ heading_mark = "#" * tree.first.nominal_level
319
+ heading_mark << " " if /^ /o !~ tree.join
320
+ element.unshift heading_mark
321
+ end
322
+ end
323
+ end
324
+
325
+ class ParagraphNodeFormatter < self
326
+ def visit(tree)
327
+ super(tree).tap {|element| element.push $/ }
328
+ end
329
+ end
330
+
331
+ # class HrNodeFormatter < self; end
332
+
333
+ class ListNodeFormatter < self
334
+ def visit(tree)
335
+ super(tree).tap do |element|
336
+ if /\A\*/o =~ element.first.join
337
+ element.push $/
338
+ end
339
+ end
340
+ end
341
+ end
342
+
343
+ class EnumNodeFormatter < self
344
+ def visit(tree)
345
+ super(tree).tap do |element|
346
+ if /\A\d/o =~ element.first.join
347
+ element.push $/
348
+ end
349
+ end
350
+ end
351
+ end
352
+
353
+ class ListWrapNodeFormatter < self
354
+ def visit(tree)
355
+ super(tree).tap do |element|
356
+ element.unshift list_mark(tree, "*")
357
+ end
358
+ end
359
+ end
360
+
361
+ class EnumWrapNodeFormatter < self
362
+ def visit(tree)
363
+ super(tree).tap do |element|
364
+ element.unshift list_mark(tree, "#{tree.nominal_level}.")
365
+ end
366
+ end
367
+ end
368
+ end
369
+ end
@@ -15,6 +15,16 @@ module PseudoHiki
15
15
  alias to_s join
16
16
  end
17
17
 
18
+ def initialize(formatter={}, options = { :verbose_mode=> false })
19
+ @formatter = formatter
20
+ options_given_via_block = nil
21
+ if block_given?
22
+ options_given_via_block = yield
23
+ options.merge!(options_given_via_block)
24
+ end
25
+ @options = OpenStruct.new(options)
26
+ end
27
+
18
28
  def create_self_element(tree=nil)
19
29
  Node.new
20
30
  end
@@ -34,47 +44,19 @@ module PseudoHiki
34
44
  element
35
45
  end
36
46
 
37
- def initialize(formatter={}, options = { :verbose_mode=> false })
38
- @formatter = formatter
39
- options_given_via_block = nil
40
- if block_given?
41
- options_given_via_block = yield
42
- options.merge!(options_given_via_block)
43
- end
44
- @options = OpenStruct.new(options)
47
+ def get_plain
48
+ @formatter[PlainNode]
49
+ end
50
+
51
+ def format(tree)
52
+ formatter = get_plain
53
+ tree.accept(formatter).join
45
54
  end
46
55
 
47
56
  def self.create(options = { :verbose_mode => false })
48
57
  formatter = {}
49
- main = self.new(formatter, options)
50
-
51
- [
52
- PlainNode,
53
- InlineNode,
54
- EmNode,
55
- StrongNode,
56
- PluginNode,
57
- VerbatimLeaf,
58
- QuoteLeaf,
59
- TableLeaf,
60
- CommentOutLeaf,
61
- HeadingLeaf,
62
- ParagraphLeaf,
63
- HrLeaf,
64
- BlockNodeEnd,
65
- ListLeaf,
66
- EnumLeaf,
67
- DescNode,
68
- QuoteNode,
69
- HeadingNode,
70
- HrNode,
71
- ListNode,
72
- EnumNode,
73
- ListWrapNode,
74
- EnumWrapNode
75
- ].each do |node_class|
76
- formatter[node_class] = self.new(formatter, options)
77
- end
58
+ main_formatter = self.new(formatter, options)
59
+ formatter.default = main_formatter
78
60
 
79
61
  formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
80
62
  formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
@@ -84,16 +66,7 @@ module PseudoHiki
84
66
  formatter[TableNode] = TableNodeFormatter.new(formatter, options)
85
67
  formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
86
68
  formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
87
- main
88
- end
89
-
90
- def get_plain
91
- @formatter[PlainNode]
92
- end
93
-
94
- def format(tree)
95
- formatter = get_plain
96
- tree.accept(formatter).join
69
+ main_formatter
97
70
  end
98
71
 
99
72
  ## Definitions of subclasses of PlainTextFormat begins here.
@@ -173,8 +146,8 @@ ERROR_TEXT
173
146
 
174
147
  def visit(tree)
175
148
  table = create_self_element(tree)
176
- rows = tree.dup
177
- rows.length.times { table.push Node.new }
149
+ rows = deep_copy_tree(tree)
150
+ rows.length.times { table.push create_self_element(tree) }
178
151
  max_col = tree.map{|row| row.reduce(0) {|sum, cell| sum + cell.colspan }}.max - 1
179
152
  max_row = rows.length - 1
180
153
  cur_row = nil
@@ -193,7 +166,13 @@ ERROR_TEXT
193
166
  end
194
167
  end
195
168
  end
196
- table.map {|row| row.join("\t")+$/ }.join
169
+ format_table(table, tree)
170
+ end
171
+
172
+ def deep_copy_tree(tree)
173
+ tree.dup.clear.tap do |new_tree|
174
+ new_tree.concat tree.map {|node| node.dup }
175
+ end
197
176
  end
198
177
 
199
178
  def each_cell_with_index(table, max_row, max_col, initial_row=0, initial_col=0)
@@ -205,8 +184,7 @@ ERROR_TEXT
205
184
  end
206
185
 
207
186
  def fill_expand(table, initial_row, initial_col, cur_cell)
208
- row_expand, col_expand = "", ""
209
- row_expand, col_expand = "||", "==" if @options.verbose_mode
187
+ row_expand, col_expand = choose_expander_of_col_and_row
210
188
  max_row = initial_row + cur_cell.rowspan - 1
211
189
  max_col = initial_col + cur_cell.colspan - 1
212
190
  each_cell_with_index(table, max_row, max_col,
@@ -220,6 +198,14 @@ ERROR_TEXT
220
198
  end
221
199
  end
222
200
 
201
+ def choose_expander_of_col_and_row
202
+ @options.verbose_mode ? ["||", "=="] : ["", ""]
203
+ end
204
+
205
+ def format_table(table, tree)
206
+ table.map {|row| row.join("\t")+$/ }.join
207
+ end
208
+
223
209
  class CommentOutNodeFormatter < self
224
210
  def visit(tree); ""; end
225
211
  end
@@ -1,3 +1,3 @@
1
1
  module PseudoHiki
2
- VERSION = "0.0.0.6.develop"
2
+ VERSION = "0.0.0.7.develop"
3
3
  end
@@ -25,6 +25,20 @@ class TC_HtmlElement < Test::Unit::TestCase
25
25
  assert_equal('<img>'+$/, img.to_s)
26
26
  end
27
27
 
28
+ def test_urlencode
29
+ utf_str = "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88" # test in utf8 katakata
30
+ sjis_str = "\x83\x65\x83\x58\x83\x67" # test in sjis katakana
31
+ euc_jp_str = "\xa5\xc6\xa5\xb9\xa5\xc8" # test in euc-jp katakana
32
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(utf_str))
33
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(sjis_str))
34
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(euc_jp_str))
35
+ end
36
+
37
+ def test_urldecode
38
+ urlencoded_str = "%E3%83%86%E3%82%B9%E3%83%88"
39
+ assert_equal("\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88", HtmlElement.urldecode(urlencoded_str))
40
+ end
41
+
28
42
  def test_doc_type
29
43
  html_doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
30
44
  "http://www.w3.org/TR/html4/loose.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
@@ -70,4 +84,20 @@ SECTION
70
84
 
71
85
  assert_equal(html5_section, Xhtml5Element.create("section").to_s)
72
86
  end
87
+
88
+ def test_traverse
89
+ html, head, meta, body, h1 = %w(html head meta body h1).map {|tagname| HtmlElement.create(tagname) }
90
+ h1_content = "heading 1"
91
+
92
+ html.push head
93
+ head.push meta
94
+ html.push body
95
+ body.push h1
96
+ h1.push h1_content
97
+
98
+ elements = []
99
+ html.traverse {|elm| elements.push elm }
100
+
101
+ assert_equal([html, head, meta, body, h1, h1_content], elements)
102
+ end
73
103
  end
@@ -185,6 +185,19 @@ HTML
185
185
  assert_equal(html,convert_text_to_html(text))
186
186
  end
187
187
 
188
+ def test_table_with_empty_cell_at_the_end
189
+ row = "||cell 1||cell 2||"
190
+ html = <<HTML
191
+ <table>
192
+ <tr><td>cell 1</td><td>cell 2</td><td></td></tr>
193
+ </table>
194
+ HTML
195
+
196
+ # <tr><td>cell 1</td><td>cell 2</td><td>&#160;</td></tr>
197
+
198
+ assert_equal(html,convert_text_to_html(row))
199
+ end
200
+
188
201
  def test_hr
189
202
  text = <<TEXT
190
203
  paragraph
@@ -352,6 +365,52 @@ HTML
352
365
  assert_equal(xhtml5, Xhtml5Format.format(tree).to_s)
353
366
  end
354
367
 
368
+ def test_string_as_input
369
+ text = <<TEXT
370
+ !heading1
371
+
372
+ paragraph1.
373
+ paragraph2.
374
+ ""citation1
375
+ paragraph3.
376
+ ----
377
+
378
+ *list1
379
+ *list2
380
+ TEXT
381
+
382
+ html = <<HTML
383
+ <div class="section h1">
384
+ <h1>heading1
385
+ </h1>
386
+ <p>
387
+ paragraph1.
388
+ paragraph2.
389
+ </p>
390
+ <blockquote>
391
+ <p>
392
+ citation1
393
+ </p>
394
+ </blockquote>
395
+ <p>
396
+ paragraph3.
397
+ </p>
398
+ <hr />
399
+ <ul>
400
+ <li>list1
401
+ </li>
402
+ <li>list2
403
+ </li>
404
+ </ul>
405
+ <!-- end of section h1 -->
406
+ </div>
407
+ HTML
408
+
409
+ formatter = XhtmlFormat.get_plain
410
+ tree = BlockParser.parse(text)
411
+ assert_equal(html,tree.accept(formatter).to_s)
412
+ end
413
+
355
414
  def test_xhtml_list
356
415
  text = <<TEXT
357
416
  *list1(1)
@@ -502,6 +561,8 @@ a verbatim line with [[a link]]
502
561
 
503
562
  another verbatim line
504
563
 
564
+ a verbatim line that begins with a space.
565
+
505
566
  the last verbatim line
506
567
  >>>
507
568
  TEXT
@@ -510,6 +571,9 @@ TEXT
510
571
  a verbatim line with [[a link]]
511
572
 
512
573
  another verbatim line
574
+
575
+ a verbatim line that begins with a space.
576
+
513
577
 
514
578
  the last verbatim line
515
579
  TEXT
@@ -521,6 +585,8 @@ a verbatim line with [[a link]]
521
585
 
522
586
  another verbatim line
523
587
 
588
+ a verbatim line that begins with a space.
589
+
524
590
  the last verbatim line
525
591
  </pre>
526
592
  HTML