pseudohikiparser 0.0.0.6.develop → 0.0.0.7.develop

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pseudohiki/inlineparser'
4
+ require 'pseudohiki/blockparser'
5
+ require 'pseudohiki/htmlformat'
6
+ require 'pseudohiki/plaintextformat'
7
+ require 'htmlelement'
8
+ require 'ostruct'
9
+
10
+ module PseudoHiki
11
+ class MarkDownFormat
12
+ include InlineParser::InlineElement
13
+ include TableRowParser::InlineElement
14
+ include BlockParser::BlockElement
15
+
16
+ def initialize(formatter={}, options={ :strict_mode=> false, :gfm_style => false })
17
+ @formatter = formatter
18
+ options_given_via_block = nil
19
+ if block_given?
20
+ options_given_via_block = yield
21
+ options.merge!(options_given_via_block)
22
+ end
23
+ @options = OpenStruct.new(options)
24
+ end
25
+
26
+ def create_self_element(tree=nil)
27
+ HtmlElement::Children.new
28
+ end
29
+
30
+ def visited_result(node)
31
+ visitor = @formatter[node.class]||@formatter[PlainNode]
32
+ node.accept(visitor)
33
+ end
34
+
35
+ def push_visited_results(element, tree)
36
+ tree.each {|token| element.push visited_result(token) }
37
+ end
38
+
39
+ def visit(tree)
40
+ element = create_self_element(tree)
41
+ push_visited_results(element, tree)
42
+ element
43
+ end
44
+
45
+ def get_plain
46
+ @formatter[PlainNode]
47
+ end
48
+
49
+ def format(tree)
50
+ formatter = get_plain
51
+ tree.accept(formatter).join
52
+ end
53
+
54
+ def list_mark(tree, mark)
55
+ mark = mark.dup
56
+ mark << " " if /^ /o !~ tree.join
57
+ " " * (tree.nominal_level - 1) * 2 + mark
58
+ end
59
+
60
+ def enclose_in(element, mark)
61
+ element.push mark
62
+ element.unshift mark
63
+ end
64
+
65
+ def remove_trailing_newlines_in_html_element(element)
66
+ element.to_s.gsub(/([^>])\r?\n/, "\\1") << $/
67
+ end
68
+
69
+ def self.create(options={ :strict_mode => false })
70
+ formatter = {}
71
+ main_formatter = self.new(formatter, options)
72
+ formatter.default = main_formatter
73
+
74
+ # formatter[PlainNode] = PlainNodeFormatter.new(formatter, options)
75
+ # formatter[InlineNode] = InlineNodeFormatter.new(formatter, options)
76
+ formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
77
+ formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
78
+ formatter[EmNode] = EmNodeFormatter.new(formatter, options)
79
+ formatter[StrongNode] = StrongNodeFormatter.new(formatter, options)
80
+ formatter[DelNode] = DelNodeFormatter.new(formatter, options)
81
+ # formatter[PluginNode] = PluginNodeFormatter.new(formatter, options)
82
+ # formatter[DescLeaf] = DescLeafFormatter.new(formatter, options)
83
+ # formatter[TableCellNode] = TableCellNodeFormatter.new(formatter, options)
84
+ formatter[VerbatimLeaf] = VerbatimLeafFormatter.new(formatter, options)
85
+ # formatter[QuoteLeaf] = QuoteLeafFormatter.new(formatter, options)
86
+ # formatter[TableLeaf] = TableLeafFormatter.new(formatter, options)
87
+ formatter[CommentOutLeaf] = CommentOutLeafFormatter.new(formatter, options)
88
+ formatter[HeadingLeaf] = HeadingLeafFormatter.new(formatter, options)
89
+ # formatter[ParagraphLeaf] = ParagraphLeafFormatter.new(formatter, options)
90
+ formatter[HrLeaf] = HrLeafFormatter.new(formatter, options)
91
+ # formatter[BlockNodeEnd] = BlockNodeEndFormatter.new(formatter, options)
92
+ # formatter[ListLeaf] = ListLeafFormatter.new(formatter, options)
93
+ # formatter[EnumLeaf] = EnumLeafFormatter.new(formatter, options)
94
+ formatter[DescNode] = DescNodeFormatter.new(formatter, options)
95
+ formatter[VerbatimNode] = VerbatimNodeFormatter.new(formatter, options)
96
+ formatter[QuoteNode] = QuoteNodeFormatter.new(formatter, options)
97
+ formatter[TableNode] = TableNodeFormatter.new(formatter, options)
98
+ # formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
99
+ formatter[HeadingNode] = HeadingNodeFormatter.new(formatter, options)
100
+ formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
101
+ # formatter[HrNode] = HrNodeFormatter.new(formatter, options)
102
+ formatter[ListNode] = ListNodeFormatter.new(formatter, options)
103
+ formatter[EnumNode] = EnumNodeFormatter.new(formatter, options)
104
+ formatter[ListWrapNode] = ListWrapNodeFormatter.new(formatter, options)
105
+ formatter[EnumWrapNode] = EnumWrapNodeFormatter.new(formatter, options)
106
+
107
+ main_formatter
108
+ end
109
+
110
+ ## Definitions of subclasses of MarkDownFormat begins here.
111
+
112
+ # class PlainNodeFormatter < self; end
113
+ # class InlineNodeFormatter < self; end
114
+
115
+ class InlineLeafFormatter < self
116
+ def visit(leaf)
117
+ leaf.map {|str| str.gsub(/([_*])/o, "\\\\\\1") }
118
+ end
119
+ end
120
+
121
+ class LinkNodeFormatter < self
122
+ def visit(tree)
123
+ tree = tree.dup
124
+ element = create_self_element
125
+ caption = get_caption(tree)
126
+ begin
127
+ ref = tree.last.join
128
+ rescue NoMethodError
129
+ raise NoMethodError unless tree.empty?
130
+ STDERR.puts "No uri is specified for #{caption}"
131
+ end
132
+ element.push "!" if ImageSuffix =~ ref
133
+ element.push "[#{(caption||tree).join}](#{tree.join})"
134
+ element
135
+ end
136
+
137
+ def get_caption(tree)
138
+ link_sep_index = tree.find_index([LinkSep])
139
+ return nil unless link_sep_index
140
+ caption_part = tree.shift(link_sep_index)
141
+ tree.shift
142
+ caption_part.map {|element| visited_result(element) }
143
+ end
144
+ end
145
+
146
+ class EmNodeFormatter < self
147
+ def visit(tree)
148
+ super(tree).tap do |element|
149
+ enclose_in(element, "_")
150
+ end
151
+ end
152
+ end
153
+
154
+ class StrongNodeFormatter < self
155
+ def visit(tree)
156
+ super(tree).tap do |element|
157
+ enclose_in(element, "**")
158
+ end
159
+ end
160
+ end
161
+
162
+ class DelNodeFormatter < self
163
+ def visit(tree)
164
+ "~~#{super(tree).join.strip}~~"
165
+ end
166
+ end
167
+
168
+ # class PluginNodeFormatter < self; end
169
+ # class DescLeafFormatter < self; end
170
+ # class TableCellNodeFormatter < self; end
171
+
172
+ class VerbatimLeafFormatter < InlineLeafFormatter
173
+ def visit(leaf)
174
+ leaf.join
175
+ end
176
+ end
177
+
178
+ # class QuoteLeafFormatter < self; end
179
+ # class TableLeafFormatter < self; end
180
+
181
+ class CommentOutLeafFormatter < self
182
+ def visit(tree); ""; end
183
+ end
184
+
185
+ class HeadingLeafFormatter < self
186
+ def visit(tree)
187
+ super(tree).tap {|element| element.push $/ }
188
+ end
189
+ end
190
+ # class ParagraphLeafFormatter < self; end
191
+
192
+ class HrLeafFormatter < self
193
+ def visit(tree)
194
+ "----#{$/}"
195
+ end
196
+ end
197
+
198
+ # class BlockNodeEndFormatter < self; end
199
+ # class ListLeafFormatter < self; end
200
+ # class EnumLeafFormatter < self; end
201
+ class DescNodeFormatter < self
202
+ def visit(tree)
203
+ desc_list = HtmlFormat.format(tree)
204
+ remove_trailing_newlines_in_html_element(desc_list)
205
+ end
206
+ end
207
+
208
+ class VerbatimNodeFormatter < self
209
+ def visit(tree)
210
+ element = super(tree)
211
+ return gfm_verbatim(element) if @options.gfm_style
212
+ md_verbatim(element)
213
+ end
214
+
215
+ def gfm_verbatim(element)
216
+ element.tap do |lines|
217
+ lines.unshift "```#{$/}"
218
+ lines.push "```#{$/ * 2}"
219
+ end
220
+ end
221
+
222
+ def md_verbatim(element)
223
+ element.join.gsub(/^/o, " ").sub(/ \Z/o, "").concat $/
224
+ end
225
+ end
226
+
227
+ class QuoteNodeFormatter < self
228
+ def visit(tree)
229
+ element = super(tree)
230
+ element.join.gsub(/^/o, "> ").sub(/> \Z/o, "")
231
+ end
232
+ end
233
+
234
+ class TableNodeFormatter < PlainTextFormat::TableNodeFormatter
235
+ class NotConformantStyleError < StandardError; end
236
+
237
+ def visit(tree)
238
+ @options.gfm_conformant = check_conformance_with_gfm_style(tree)
239
+ super(tree)
240
+ end
241
+
242
+ def deep_copy_tree(tree)
243
+ tree.dup.clear.tap do |new_tree|
244
+ new_tree.concat tree.map {|node| node.dup }
245
+ end
246
+ end
247
+
248
+ def choose_expander_of_col_and_row
249
+ ["", ""]
250
+ end
251
+
252
+ def format_gfm_table(table)
253
+ cell_width = calculate_cell_width(table)
254
+ header_delimiter = cell_width.map {|width| "-" * width }
255
+ cell_formats = cell_width.map {|width| "%-#{width}s" }
256
+ table[1,0] = [header_delimiter]
257
+ table.map do |row|
258
+ formatted_row = row.zip(cell_formats).map do |cell, format|
259
+ format%[cell]
260
+ end
261
+ "|#{formatted_row.join("|") }|#{$/}"
262
+ end.join
263
+ end
264
+
265
+ def format_html_table(tree)
266
+ table = HtmlElement.create("table").tap do |element|
267
+ element.push HtmlFormat.format(tree)
268
+ end.to_s
269
+ @formatter[PlainNode].remove_trailing_newlines_in_html_element(table)
270
+ end
271
+
272
+ def format_table(table, tree)
273
+ return format_html_table(tree) unless @options.gfm_style
274
+ return format_gfm_table(table) if @options.gfm_conformant
275
+
276
+ if @options.gfm_style == :force
277
+ begin
278
+ raise NotConformantStyleError.new("The table is not conformant to GFM style. The first row will be treated as a header row.")
279
+ rescue
280
+ STDERR.puts "The table is not conformant to GFM style. The first row will be treated as a header row."
281
+ end
282
+ return format_gfm_table(table)
283
+ end
284
+
285
+ format_html_table(tree)
286
+ end
287
+
288
+ def calculate_cell_width(table)
289
+ cell_width = Array.new(table.first.length, 0)
290
+ table.each do |row|
291
+ row.each_with_index do |cell, i|
292
+ cell_width[i] = cell.length if cell_width[i] < cell.length
293
+ end
294
+ end
295
+ cell_width
296
+ end
297
+
298
+ def check_conformance_with_gfm_style(rows)
299
+ rows.each_with_index do |row, i|
300
+ row.each do |cell|
301
+ return false if cell.rowspan > 1 or cell.colspan > 1
302
+ if i == 0
303
+ return false unless cell.cell_type == "th"
304
+ else
305
+ return false if cell.cell_type == "th"
306
+ end
307
+ end
308
+ end
309
+ true
310
+ end
311
+ end
312
+
313
+ # class CommentOutNodeFormatter < self; end
314
+
315
+ class HeadingNodeFormatter < self
316
+ def visit(tree)
317
+ super(tree).tap do |element|
318
+ heading_mark = "#" * tree.first.nominal_level
319
+ heading_mark << " " if /^ /o !~ tree.join
320
+ element.unshift heading_mark
321
+ end
322
+ end
323
+ end
324
+
325
+ class ParagraphNodeFormatter < self
326
+ def visit(tree)
327
+ super(tree).tap {|element| element.push $/ }
328
+ end
329
+ end
330
+
331
+ # class HrNodeFormatter < self; end
332
+
333
+ class ListNodeFormatter < self
334
+ def visit(tree)
335
+ super(tree).tap do |element|
336
+ if /\A\*/o =~ element.first.join
337
+ element.push $/
338
+ end
339
+ end
340
+ end
341
+ end
342
+
343
+ class EnumNodeFormatter < self
344
+ def visit(tree)
345
+ super(tree).tap do |element|
346
+ if /\A\d/o =~ element.first.join
347
+ element.push $/
348
+ end
349
+ end
350
+ end
351
+ end
352
+
353
+ class ListWrapNodeFormatter < self
354
+ def visit(tree)
355
+ super(tree).tap do |element|
356
+ element.unshift list_mark(tree, "*")
357
+ end
358
+ end
359
+ end
360
+
361
+ class EnumWrapNodeFormatter < self
362
+ def visit(tree)
363
+ super(tree).tap do |element|
364
+ element.unshift list_mark(tree, "#{tree.nominal_level}.")
365
+ end
366
+ end
367
+ end
368
+ end
369
+ end
@@ -15,6 +15,16 @@ module PseudoHiki
15
15
  alias to_s join
16
16
  end
17
17
 
18
+ def initialize(formatter={}, options = { :verbose_mode=> false })
19
+ @formatter = formatter
20
+ options_given_via_block = nil
21
+ if block_given?
22
+ options_given_via_block = yield
23
+ options.merge!(options_given_via_block)
24
+ end
25
+ @options = OpenStruct.new(options)
26
+ end
27
+
18
28
  def create_self_element(tree=nil)
19
29
  Node.new
20
30
  end
@@ -34,47 +44,19 @@ module PseudoHiki
34
44
  element
35
45
  end
36
46
 
37
- def initialize(formatter={}, options = { :verbose_mode=> false })
38
- @formatter = formatter
39
- options_given_via_block = nil
40
- if block_given?
41
- options_given_via_block = yield
42
- options.merge!(options_given_via_block)
43
- end
44
- @options = OpenStruct.new(options)
47
+ def get_plain
48
+ @formatter[PlainNode]
49
+ end
50
+
51
+ def format(tree)
52
+ formatter = get_plain
53
+ tree.accept(formatter).join
45
54
  end
46
55
 
47
56
  def self.create(options = { :verbose_mode => false })
48
57
  formatter = {}
49
- main = self.new(formatter, options)
50
-
51
- [
52
- PlainNode,
53
- InlineNode,
54
- EmNode,
55
- StrongNode,
56
- PluginNode,
57
- VerbatimLeaf,
58
- QuoteLeaf,
59
- TableLeaf,
60
- CommentOutLeaf,
61
- HeadingLeaf,
62
- ParagraphLeaf,
63
- HrLeaf,
64
- BlockNodeEnd,
65
- ListLeaf,
66
- EnumLeaf,
67
- DescNode,
68
- QuoteNode,
69
- HeadingNode,
70
- HrNode,
71
- ListNode,
72
- EnumNode,
73
- ListWrapNode,
74
- EnumWrapNode
75
- ].each do |node_class|
76
- formatter[node_class] = self.new(formatter, options)
77
- end
58
+ main_formatter = self.new(formatter, options)
59
+ formatter.default = main_formatter
78
60
 
79
61
  formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
80
62
  formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
@@ -84,16 +66,7 @@ module PseudoHiki
84
66
  formatter[TableNode] = TableNodeFormatter.new(formatter, options)
85
67
  formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
86
68
  formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
87
- main
88
- end
89
-
90
- def get_plain
91
- @formatter[PlainNode]
92
- end
93
-
94
- def format(tree)
95
- formatter = get_plain
96
- tree.accept(formatter).join
69
+ main_formatter
97
70
  end
98
71
 
99
72
  ## Definitions of subclasses of PlainTextFormat begins here.
@@ -173,8 +146,8 @@ ERROR_TEXT
173
146
 
174
147
  def visit(tree)
175
148
  table = create_self_element(tree)
176
- rows = tree.dup
177
- rows.length.times { table.push Node.new }
149
+ rows = deep_copy_tree(tree)
150
+ rows.length.times { table.push create_self_element(tree) }
178
151
  max_col = tree.map{|row| row.reduce(0) {|sum, cell| sum + cell.colspan }}.max - 1
179
152
  max_row = rows.length - 1
180
153
  cur_row = nil
@@ -193,7 +166,13 @@ ERROR_TEXT
193
166
  end
194
167
  end
195
168
  end
196
- table.map {|row| row.join("\t")+$/ }.join
169
+ format_table(table, tree)
170
+ end
171
+
172
+ def deep_copy_tree(tree)
173
+ tree.dup.clear.tap do |new_tree|
174
+ new_tree.concat tree.map {|node| node.dup }
175
+ end
197
176
  end
198
177
 
199
178
  def each_cell_with_index(table, max_row, max_col, initial_row=0, initial_col=0)
@@ -205,8 +184,7 @@ ERROR_TEXT
205
184
  end
206
185
 
207
186
  def fill_expand(table, initial_row, initial_col, cur_cell)
208
- row_expand, col_expand = "", ""
209
- row_expand, col_expand = "||", "==" if @options.verbose_mode
187
+ row_expand, col_expand = choose_expander_of_col_and_row
210
188
  max_row = initial_row + cur_cell.rowspan - 1
211
189
  max_col = initial_col + cur_cell.colspan - 1
212
190
  each_cell_with_index(table, max_row, max_col,
@@ -220,6 +198,14 @@ ERROR_TEXT
220
198
  end
221
199
  end
222
200
 
201
+ def choose_expander_of_col_and_row
202
+ @options.verbose_mode ? ["||", "=="] : ["", ""]
203
+ end
204
+
205
+ def format_table(table, tree)
206
+ table.map {|row| row.join("\t")+$/ }.join
207
+ end
208
+
223
209
  class CommentOutNodeFormatter < self
224
210
  def visit(tree); ""; end
225
211
  end
@@ -1,3 +1,3 @@
1
1
  module PseudoHiki
2
- VERSION = "0.0.0.6.develop"
2
+ VERSION = "0.0.0.7.develop"
3
3
  end
@@ -25,6 +25,20 @@ class TC_HtmlElement < Test::Unit::TestCase
25
25
  assert_equal('<img>'+$/, img.to_s)
26
26
  end
27
27
 
28
+ def test_urlencode
29
+ utf_str = "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88" # test in utf8 katakata
30
+ sjis_str = "\x83\x65\x83\x58\x83\x67" # test in sjis katakana
31
+ euc_jp_str = "\xa5\xc6\xa5\xb9\xa5\xc8" # test in euc-jp katakana
32
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(utf_str))
33
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(sjis_str))
34
+ assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(euc_jp_str))
35
+ end
36
+
37
+ def test_urldecode
38
+ urlencoded_str = "%E3%83%86%E3%82%B9%E3%83%88"
39
+ assert_equal("\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88", HtmlElement.urldecode(urlencoded_str))
40
+ end
41
+
28
42
  def test_doc_type
29
43
  html_doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
30
44
  "http://www.w3.org/TR/html4/loose.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
@@ -70,4 +84,20 @@ SECTION
70
84
 
71
85
  assert_equal(html5_section, Xhtml5Element.create("section").to_s)
72
86
  end
87
+
88
+ def test_traverse
89
+ html, head, meta, body, h1 = %w(html head meta body h1).map {|tagname| HtmlElement.create(tagname) }
90
+ h1_content = "heading 1"
91
+
92
+ html.push head
93
+ head.push meta
94
+ html.push body
95
+ body.push h1
96
+ h1.push h1_content
97
+
98
+ elements = []
99
+ html.traverse {|elm| elements.push elm }
100
+
101
+ assert_equal([html, head, meta, body, h1, h1_content], elements)
102
+ end
73
103
  end
@@ -185,6 +185,19 @@ HTML
185
185
  assert_equal(html,convert_text_to_html(text))
186
186
  end
187
187
 
188
+ def test_table_with_empty_cell_at_the_end
189
+ row = "||cell 1||cell 2||"
190
+ html = <<HTML
191
+ <table>
192
+ <tr><td>cell 1</td><td>cell 2</td><td></td></tr>
193
+ </table>
194
+ HTML
195
+
196
+ # <tr><td>cell 1</td><td>cell 2</td><td>&#160;</td></tr>
197
+
198
+ assert_equal(html,convert_text_to_html(row))
199
+ end
200
+
188
201
  def test_hr
189
202
  text = <<TEXT
190
203
  paragraph
@@ -352,6 +365,52 @@ HTML
352
365
  assert_equal(xhtml5, Xhtml5Format.format(tree).to_s)
353
366
  end
354
367
 
368
+ def test_string_as_input
369
+ text = <<TEXT
370
+ !heading1
371
+
372
+ paragraph1.
373
+ paragraph2.
374
+ ""citation1
375
+ paragraph3.
376
+ ----
377
+
378
+ *list1
379
+ *list2
380
+ TEXT
381
+
382
+ html = <<HTML
383
+ <div class="section h1">
384
+ <h1>heading1
385
+ </h1>
386
+ <p>
387
+ paragraph1.
388
+ paragraph2.
389
+ </p>
390
+ <blockquote>
391
+ <p>
392
+ citation1
393
+ </p>
394
+ </blockquote>
395
+ <p>
396
+ paragraph3.
397
+ </p>
398
+ <hr />
399
+ <ul>
400
+ <li>list1
401
+ </li>
402
+ <li>list2
403
+ </li>
404
+ </ul>
405
+ <!-- end of section h1 -->
406
+ </div>
407
+ HTML
408
+
409
+ formatter = XhtmlFormat.get_plain
410
+ tree = BlockParser.parse(text)
411
+ assert_equal(html,tree.accept(formatter).to_s)
412
+ end
413
+
355
414
  def test_xhtml_list
356
415
  text = <<TEXT
357
416
  *list1(1)
@@ -502,6 +561,8 @@ a verbatim line with [[a link]]
502
561
 
503
562
  another verbatim line
504
563
 
564
+ a verbatim line that begins with a space.
565
+
505
566
  the last verbatim line
506
567
  >>>
507
568
  TEXT
@@ -510,6 +571,9 @@ TEXT
510
571
  a verbatim line with [[a link]]
511
572
 
512
573
  another verbatim line
574
+
575
+ a verbatim line that begins with a space.
576
+
513
577
 
514
578
  the last verbatim line
515
579
  TEXT
@@ -521,6 +585,8 @@ a verbatim line with [[a link]]
521
585
 
522
586
  another verbatim line
523
587
 
588
+ a verbatim line that begins with a space.
589
+
524
590
  the last verbatim line
525
591
  </pre>
526
592
  HTML