pseudohikiparser 0.0.0.6.develop → 0.0.0.7.develop
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -14
- data/lib/htmlelement.rb +18 -3
- data/lib/pseudohiki/blockparser.rb +40 -46
- data/lib/pseudohiki/htmlformat.rb +1 -0
- data/lib/pseudohiki/inlineparser.rb +32 -34
- data/lib/pseudohiki/markdownformat.rb +369 -0
- data/lib/pseudohiki/plaintextformat.rb +38 -52
- data/lib/pseudohiki/version.rb +1 -1
- data/test/test_htmlelement.rb +30 -0
- data/test/test_htmlformat.rb +66 -0
- data/test/test_markdownformat.rb +436 -0
- metadata +5 -2
@@ -0,0 +1,369 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'pseudohiki/inlineparser'
|
4
|
+
require 'pseudohiki/blockparser'
|
5
|
+
require 'pseudohiki/htmlformat'
|
6
|
+
require 'pseudohiki/plaintextformat'
|
7
|
+
require 'htmlelement'
|
8
|
+
require 'ostruct'
|
9
|
+
|
10
|
+
module PseudoHiki
|
11
|
+
class MarkDownFormat
|
12
|
+
include InlineParser::InlineElement
|
13
|
+
include TableRowParser::InlineElement
|
14
|
+
include BlockParser::BlockElement
|
15
|
+
|
16
|
+
def initialize(formatter={}, options={ :strict_mode=> false, :gfm_style => false })
|
17
|
+
@formatter = formatter
|
18
|
+
options_given_via_block = nil
|
19
|
+
if block_given?
|
20
|
+
options_given_via_block = yield
|
21
|
+
options.merge!(options_given_via_block)
|
22
|
+
end
|
23
|
+
@options = OpenStruct.new(options)
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_self_element(tree=nil)
|
27
|
+
HtmlElement::Children.new
|
28
|
+
end
|
29
|
+
|
30
|
+
def visited_result(node)
|
31
|
+
visitor = @formatter[node.class]||@formatter[PlainNode]
|
32
|
+
node.accept(visitor)
|
33
|
+
end
|
34
|
+
|
35
|
+
def push_visited_results(element, tree)
|
36
|
+
tree.each {|token| element.push visited_result(token) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def visit(tree)
|
40
|
+
element = create_self_element(tree)
|
41
|
+
push_visited_results(element, tree)
|
42
|
+
element
|
43
|
+
end
|
44
|
+
|
45
|
+
def get_plain
|
46
|
+
@formatter[PlainNode]
|
47
|
+
end
|
48
|
+
|
49
|
+
def format(tree)
|
50
|
+
formatter = get_plain
|
51
|
+
tree.accept(formatter).join
|
52
|
+
end
|
53
|
+
|
54
|
+
def list_mark(tree, mark)
|
55
|
+
mark = mark.dup
|
56
|
+
mark << " " if /^ /o !~ tree.join
|
57
|
+
" " * (tree.nominal_level - 1) * 2 + mark
|
58
|
+
end
|
59
|
+
|
60
|
+
def enclose_in(element, mark)
|
61
|
+
element.push mark
|
62
|
+
element.unshift mark
|
63
|
+
end
|
64
|
+
|
65
|
+
def remove_trailing_newlines_in_html_element(element)
|
66
|
+
element.to_s.gsub(/([^>])\r?\n/, "\\1") << $/
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.create(options={ :strict_mode => false })
|
70
|
+
formatter = {}
|
71
|
+
main_formatter = self.new(formatter, options)
|
72
|
+
formatter.default = main_formatter
|
73
|
+
|
74
|
+
# formatter[PlainNode] = PlainNodeFormatter.new(formatter, options)
|
75
|
+
# formatter[InlineNode] = InlineNodeFormatter.new(formatter, options)
|
76
|
+
formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
|
77
|
+
formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
|
78
|
+
formatter[EmNode] = EmNodeFormatter.new(formatter, options)
|
79
|
+
formatter[StrongNode] = StrongNodeFormatter.new(formatter, options)
|
80
|
+
formatter[DelNode] = DelNodeFormatter.new(formatter, options)
|
81
|
+
# formatter[PluginNode] = PluginNodeFormatter.new(formatter, options)
|
82
|
+
# formatter[DescLeaf] = DescLeafFormatter.new(formatter, options)
|
83
|
+
# formatter[TableCellNode] = TableCellNodeFormatter.new(formatter, options)
|
84
|
+
formatter[VerbatimLeaf] = VerbatimLeafFormatter.new(formatter, options)
|
85
|
+
# formatter[QuoteLeaf] = QuoteLeafFormatter.new(formatter, options)
|
86
|
+
# formatter[TableLeaf] = TableLeafFormatter.new(formatter, options)
|
87
|
+
formatter[CommentOutLeaf] = CommentOutLeafFormatter.new(formatter, options)
|
88
|
+
formatter[HeadingLeaf] = HeadingLeafFormatter.new(formatter, options)
|
89
|
+
# formatter[ParagraphLeaf] = ParagraphLeafFormatter.new(formatter, options)
|
90
|
+
formatter[HrLeaf] = HrLeafFormatter.new(formatter, options)
|
91
|
+
# formatter[BlockNodeEnd] = BlockNodeEndFormatter.new(formatter, options)
|
92
|
+
# formatter[ListLeaf] = ListLeafFormatter.new(formatter, options)
|
93
|
+
# formatter[EnumLeaf] = EnumLeafFormatter.new(formatter, options)
|
94
|
+
formatter[DescNode] = DescNodeFormatter.new(formatter, options)
|
95
|
+
formatter[VerbatimNode] = VerbatimNodeFormatter.new(formatter, options)
|
96
|
+
formatter[QuoteNode] = QuoteNodeFormatter.new(formatter, options)
|
97
|
+
formatter[TableNode] = TableNodeFormatter.new(formatter, options)
|
98
|
+
# formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
|
99
|
+
formatter[HeadingNode] = HeadingNodeFormatter.new(formatter, options)
|
100
|
+
formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
|
101
|
+
# formatter[HrNode] = HrNodeFormatter.new(formatter, options)
|
102
|
+
formatter[ListNode] = ListNodeFormatter.new(formatter, options)
|
103
|
+
formatter[EnumNode] = EnumNodeFormatter.new(formatter, options)
|
104
|
+
formatter[ListWrapNode] = ListWrapNodeFormatter.new(formatter, options)
|
105
|
+
formatter[EnumWrapNode] = EnumWrapNodeFormatter.new(formatter, options)
|
106
|
+
|
107
|
+
main_formatter
|
108
|
+
end
|
109
|
+
|
110
|
+
## Definitions of subclasses of MarkDownFormat begins here.
|
111
|
+
|
112
|
+
# class PlainNodeFormatter < self; end
|
113
|
+
# class InlineNodeFormatter < self; end
|
114
|
+
|
115
|
+
class InlineLeafFormatter < self
|
116
|
+
def visit(leaf)
|
117
|
+
leaf.map {|str| str.gsub(/([_*])/o, "\\\\\\1") }
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class LinkNodeFormatter < self
|
122
|
+
def visit(tree)
|
123
|
+
tree = tree.dup
|
124
|
+
element = create_self_element
|
125
|
+
caption = get_caption(tree)
|
126
|
+
begin
|
127
|
+
ref = tree.last.join
|
128
|
+
rescue NoMethodError
|
129
|
+
raise NoMethodError unless tree.empty?
|
130
|
+
STDERR.puts "No uri is specified for #{caption}"
|
131
|
+
end
|
132
|
+
element.push "!" if ImageSuffix =~ ref
|
133
|
+
element.push "[#{(caption||tree).join}](#{tree.join})"
|
134
|
+
element
|
135
|
+
end
|
136
|
+
|
137
|
+
def get_caption(tree)
|
138
|
+
link_sep_index = tree.find_index([LinkSep])
|
139
|
+
return nil unless link_sep_index
|
140
|
+
caption_part = tree.shift(link_sep_index)
|
141
|
+
tree.shift
|
142
|
+
caption_part.map {|element| visited_result(element) }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class EmNodeFormatter < self
|
147
|
+
def visit(tree)
|
148
|
+
super(tree).tap do |element|
|
149
|
+
enclose_in(element, "_")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
class StrongNodeFormatter < self
|
155
|
+
def visit(tree)
|
156
|
+
super(tree).tap do |element|
|
157
|
+
enclose_in(element, "**")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
class DelNodeFormatter < self
|
163
|
+
def visit(tree)
|
164
|
+
"~~#{super(tree).join.strip}~~"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
# class PluginNodeFormatter < self; end
|
169
|
+
# class DescLeafFormatter < self; end
|
170
|
+
# class TableCellNodeFormatter < self; end
|
171
|
+
|
172
|
+
class VerbatimLeafFormatter < InlineLeafFormatter
|
173
|
+
def visit(leaf)
|
174
|
+
leaf.join
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# class QuoteLeafFormatter < self; end
|
179
|
+
# class TableLeafFormatter < self; end
|
180
|
+
|
181
|
+
class CommentOutLeafFormatter < self
|
182
|
+
def visit(tree); ""; end
|
183
|
+
end
|
184
|
+
|
185
|
+
class HeadingLeafFormatter < self
|
186
|
+
def visit(tree)
|
187
|
+
super(tree).tap {|element| element.push $/ }
|
188
|
+
end
|
189
|
+
end
|
190
|
+
# class ParagraphLeafFormatter < self; end
|
191
|
+
|
192
|
+
class HrLeafFormatter < self
|
193
|
+
def visit(tree)
|
194
|
+
"----#{$/}"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# class BlockNodeEndFormatter < self; end
|
199
|
+
# class ListLeafFormatter < self; end
|
200
|
+
# class EnumLeafFormatter < self; end
|
201
|
+
class DescNodeFormatter < self
|
202
|
+
def visit(tree)
|
203
|
+
desc_list = HtmlFormat.format(tree)
|
204
|
+
remove_trailing_newlines_in_html_element(desc_list)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
class VerbatimNodeFormatter < self
|
209
|
+
def visit(tree)
|
210
|
+
element = super(tree)
|
211
|
+
return gfm_verbatim(element) if @options.gfm_style
|
212
|
+
md_verbatim(element)
|
213
|
+
end
|
214
|
+
|
215
|
+
def gfm_verbatim(element)
|
216
|
+
element.tap do |lines|
|
217
|
+
lines.unshift "```#{$/}"
|
218
|
+
lines.push "```#{$/ * 2}"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def md_verbatim(element)
|
223
|
+
element.join.gsub(/^/o, " ").sub(/ \Z/o, "").concat $/
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
class QuoteNodeFormatter < self
|
228
|
+
def visit(tree)
|
229
|
+
element = super(tree)
|
230
|
+
element.join.gsub(/^/o, "> ").sub(/> \Z/o, "")
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
class TableNodeFormatter < PlainTextFormat::TableNodeFormatter
|
235
|
+
class NotConformantStyleError < StandardError; end
|
236
|
+
|
237
|
+
def visit(tree)
|
238
|
+
@options.gfm_conformant = check_conformance_with_gfm_style(tree)
|
239
|
+
super(tree)
|
240
|
+
end
|
241
|
+
|
242
|
+
def deep_copy_tree(tree)
|
243
|
+
tree.dup.clear.tap do |new_tree|
|
244
|
+
new_tree.concat tree.map {|node| node.dup }
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def choose_expander_of_col_and_row
|
249
|
+
["", ""]
|
250
|
+
end
|
251
|
+
|
252
|
+
def format_gfm_table(table)
|
253
|
+
cell_width = calculate_cell_width(table)
|
254
|
+
header_delimiter = cell_width.map {|width| "-" * width }
|
255
|
+
cell_formats = cell_width.map {|width| "%-#{width}s" }
|
256
|
+
table[1,0] = [header_delimiter]
|
257
|
+
table.map do |row|
|
258
|
+
formatted_row = row.zip(cell_formats).map do |cell, format|
|
259
|
+
format%[cell]
|
260
|
+
end
|
261
|
+
"|#{formatted_row.join("|") }|#{$/}"
|
262
|
+
end.join
|
263
|
+
end
|
264
|
+
|
265
|
+
def format_html_table(tree)
|
266
|
+
table = HtmlElement.create("table").tap do |element|
|
267
|
+
element.push HtmlFormat.format(tree)
|
268
|
+
end.to_s
|
269
|
+
@formatter[PlainNode].remove_trailing_newlines_in_html_element(table)
|
270
|
+
end
|
271
|
+
|
272
|
+
def format_table(table, tree)
|
273
|
+
return format_html_table(tree) unless @options.gfm_style
|
274
|
+
return format_gfm_table(table) if @options.gfm_conformant
|
275
|
+
|
276
|
+
if @options.gfm_style == :force
|
277
|
+
begin
|
278
|
+
raise NotConformantStyleError.new("The table is not conformant to GFM style. The first row will be treated as a header row.")
|
279
|
+
rescue
|
280
|
+
STDERR.puts "The table is not conformant to GFM style. The first row will be treated as a header row."
|
281
|
+
end
|
282
|
+
return format_gfm_table(table)
|
283
|
+
end
|
284
|
+
|
285
|
+
format_html_table(tree)
|
286
|
+
end
|
287
|
+
|
288
|
+
def calculate_cell_width(table)
|
289
|
+
cell_width = Array.new(table.first.length, 0)
|
290
|
+
table.each do |row|
|
291
|
+
row.each_with_index do |cell, i|
|
292
|
+
cell_width[i] = cell.length if cell_width[i] < cell.length
|
293
|
+
end
|
294
|
+
end
|
295
|
+
cell_width
|
296
|
+
end
|
297
|
+
|
298
|
+
def check_conformance_with_gfm_style(rows)
|
299
|
+
rows.each_with_index do |row, i|
|
300
|
+
row.each do |cell|
|
301
|
+
return false if cell.rowspan > 1 or cell.colspan > 1
|
302
|
+
if i == 0
|
303
|
+
return false unless cell.cell_type == "th"
|
304
|
+
else
|
305
|
+
return false if cell.cell_type == "th"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
true
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# class CommentOutNodeFormatter < self; end
|
314
|
+
|
315
|
+
class HeadingNodeFormatter < self
|
316
|
+
def visit(tree)
|
317
|
+
super(tree).tap do |element|
|
318
|
+
heading_mark = "#" * tree.first.nominal_level
|
319
|
+
heading_mark << " " if /^ /o !~ tree.join
|
320
|
+
element.unshift heading_mark
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
class ParagraphNodeFormatter < self
|
326
|
+
def visit(tree)
|
327
|
+
super(tree).tap {|element| element.push $/ }
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# class HrNodeFormatter < self; end
|
332
|
+
|
333
|
+
class ListNodeFormatter < self
|
334
|
+
def visit(tree)
|
335
|
+
super(tree).tap do |element|
|
336
|
+
if /\A\*/o =~ element.first.join
|
337
|
+
element.push $/
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
class EnumNodeFormatter < self
|
344
|
+
def visit(tree)
|
345
|
+
super(tree).tap do |element|
|
346
|
+
if /\A\d/o =~ element.first.join
|
347
|
+
element.push $/
|
348
|
+
end
|
349
|
+
end
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
class ListWrapNodeFormatter < self
|
354
|
+
def visit(tree)
|
355
|
+
super(tree).tap do |element|
|
356
|
+
element.unshift list_mark(tree, "*")
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
class EnumWrapNodeFormatter < self
|
362
|
+
def visit(tree)
|
363
|
+
super(tree).tap do |element|
|
364
|
+
element.unshift list_mark(tree, "#{tree.nominal_level}.")
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
end
|
369
|
+
end
|
@@ -15,6 +15,16 @@ module PseudoHiki
|
|
15
15
|
alias to_s join
|
16
16
|
end
|
17
17
|
|
18
|
+
def initialize(formatter={}, options = { :verbose_mode=> false })
|
19
|
+
@formatter = formatter
|
20
|
+
options_given_via_block = nil
|
21
|
+
if block_given?
|
22
|
+
options_given_via_block = yield
|
23
|
+
options.merge!(options_given_via_block)
|
24
|
+
end
|
25
|
+
@options = OpenStruct.new(options)
|
26
|
+
end
|
27
|
+
|
18
28
|
def create_self_element(tree=nil)
|
19
29
|
Node.new
|
20
30
|
end
|
@@ -34,47 +44,19 @@ module PseudoHiki
|
|
34
44
|
element
|
35
45
|
end
|
36
46
|
|
37
|
-
def
|
38
|
-
@formatter
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
@options = OpenStruct.new(options)
|
47
|
+
def get_plain
|
48
|
+
@formatter[PlainNode]
|
49
|
+
end
|
50
|
+
|
51
|
+
def format(tree)
|
52
|
+
formatter = get_plain
|
53
|
+
tree.accept(formatter).join
|
45
54
|
end
|
46
55
|
|
47
56
|
def self.create(options = { :verbose_mode => false })
|
48
57
|
formatter = {}
|
49
|
-
|
50
|
-
|
51
|
-
[
|
52
|
-
PlainNode,
|
53
|
-
InlineNode,
|
54
|
-
EmNode,
|
55
|
-
StrongNode,
|
56
|
-
PluginNode,
|
57
|
-
VerbatimLeaf,
|
58
|
-
QuoteLeaf,
|
59
|
-
TableLeaf,
|
60
|
-
CommentOutLeaf,
|
61
|
-
HeadingLeaf,
|
62
|
-
ParagraphLeaf,
|
63
|
-
HrLeaf,
|
64
|
-
BlockNodeEnd,
|
65
|
-
ListLeaf,
|
66
|
-
EnumLeaf,
|
67
|
-
DescNode,
|
68
|
-
QuoteNode,
|
69
|
-
HeadingNode,
|
70
|
-
HrNode,
|
71
|
-
ListNode,
|
72
|
-
EnumNode,
|
73
|
-
ListWrapNode,
|
74
|
-
EnumWrapNode
|
75
|
-
].each do |node_class|
|
76
|
-
formatter[node_class] = self.new(formatter, options)
|
77
|
-
end
|
58
|
+
main_formatter = self.new(formatter, options)
|
59
|
+
formatter.default = main_formatter
|
78
60
|
|
79
61
|
formatter[InlineLeaf] = InlineLeafFormatter.new(formatter, options)
|
80
62
|
formatter[LinkNode] = LinkNodeFormatter.new(formatter, options)
|
@@ -84,16 +66,7 @@ module PseudoHiki
|
|
84
66
|
formatter[TableNode] = TableNodeFormatter.new(formatter, options)
|
85
67
|
formatter[CommentOutNode] = CommentOutNodeFormatter.new(formatter, options)
|
86
68
|
formatter[ParagraphNode] = ParagraphNodeFormatter.new(formatter, options)
|
87
|
-
|
88
|
-
end
|
89
|
-
|
90
|
-
def get_plain
|
91
|
-
@formatter[PlainNode]
|
92
|
-
end
|
93
|
-
|
94
|
-
def format(tree)
|
95
|
-
formatter = get_plain
|
96
|
-
tree.accept(formatter).join
|
69
|
+
main_formatter
|
97
70
|
end
|
98
71
|
|
99
72
|
## Definitions of subclasses of PlainTextFormat begins here.
|
@@ -173,8 +146,8 @@ ERROR_TEXT
|
|
173
146
|
|
174
147
|
def visit(tree)
|
175
148
|
table = create_self_element(tree)
|
176
|
-
rows = tree
|
177
|
-
rows.length.times { table.push
|
149
|
+
rows = deep_copy_tree(tree)
|
150
|
+
rows.length.times { table.push create_self_element(tree) }
|
178
151
|
max_col = tree.map{|row| row.reduce(0) {|sum, cell| sum + cell.colspan }}.max - 1
|
179
152
|
max_row = rows.length - 1
|
180
153
|
cur_row = nil
|
@@ -193,7 +166,13 @@ ERROR_TEXT
|
|
193
166
|
end
|
194
167
|
end
|
195
168
|
end
|
196
|
-
table
|
169
|
+
format_table(table, tree)
|
170
|
+
end
|
171
|
+
|
172
|
+
def deep_copy_tree(tree)
|
173
|
+
tree.dup.clear.tap do |new_tree|
|
174
|
+
new_tree.concat tree.map {|node| node.dup }
|
175
|
+
end
|
197
176
|
end
|
198
177
|
|
199
178
|
def each_cell_with_index(table, max_row, max_col, initial_row=0, initial_col=0)
|
@@ -205,8 +184,7 @@ ERROR_TEXT
|
|
205
184
|
end
|
206
185
|
|
207
186
|
def fill_expand(table, initial_row, initial_col, cur_cell)
|
208
|
-
row_expand, col_expand =
|
209
|
-
row_expand, col_expand = "||", "==" if @options.verbose_mode
|
187
|
+
row_expand, col_expand = choose_expander_of_col_and_row
|
210
188
|
max_row = initial_row + cur_cell.rowspan - 1
|
211
189
|
max_col = initial_col + cur_cell.colspan - 1
|
212
190
|
each_cell_with_index(table, max_row, max_col,
|
@@ -220,6 +198,14 @@ ERROR_TEXT
|
|
220
198
|
end
|
221
199
|
end
|
222
200
|
|
201
|
+
def choose_expander_of_col_and_row
|
202
|
+
@options.verbose_mode ? ["||", "=="] : ["", ""]
|
203
|
+
end
|
204
|
+
|
205
|
+
def format_table(table, tree)
|
206
|
+
table.map {|row| row.join("\t")+$/ }.join
|
207
|
+
end
|
208
|
+
|
223
209
|
class CommentOutNodeFormatter < self
|
224
210
|
def visit(tree); ""; end
|
225
211
|
end
|
data/lib/pseudohiki/version.rb
CHANGED
data/test/test_htmlelement.rb
CHANGED
@@ -25,6 +25,20 @@ class TC_HtmlElement < Test::Unit::TestCase
|
|
25
25
|
assert_equal('<img>'+$/, img.to_s)
|
26
26
|
end
|
27
27
|
|
28
|
+
def test_urlencode
|
29
|
+
utf_str = "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88" # test in utf8 katakata
|
30
|
+
sjis_str = "\x83\x65\x83\x58\x83\x67" # test in sjis katakana
|
31
|
+
euc_jp_str = "\xa5\xc6\xa5\xb9\xa5\xc8" # test in euc-jp katakana
|
32
|
+
assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(utf_str))
|
33
|
+
assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(sjis_str))
|
34
|
+
assert_equal("%E3%83%86%E3%82%B9%E3%83%88", HtmlElement.urlencode(euc_jp_str))
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_urldecode
|
38
|
+
urlencoded_str = "%E3%83%86%E3%82%B9%E3%83%88"
|
39
|
+
assert_equal("\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88", HtmlElement.urldecode(urlencoded_str))
|
40
|
+
end
|
41
|
+
|
28
42
|
def test_doc_type
|
29
43
|
html_doctype = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
30
44
|
"http://www.w3.org/TR/html4/loose.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
|
@@ -70,4 +84,20 @@ SECTION
|
|
70
84
|
|
71
85
|
assert_equal(html5_section, Xhtml5Element.create("section").to_s)
|
72
86
|
end
|
87
|
+
|
88
|
+
def test_traverse
|
89
|
+
html, head, meta, body, h1 = %w(html head meta body h1).map {|tagname| HtmlElement.create(tagname) }
|
90
|
+
h1_content = "heading 1"
|
91
|
+
|
92
|
+
html.push head
|
93
|
+
head.push meta
|
94
|
+
html.push body
|
95
|
+
body.push h1
|
96
|
+
h1.push h1_content
|
97
|
+
|
98
|
+
elements = []
|
99
|
+
html.traverse {|elm| elements.push elm }
|
100
|
+
|
101
|
+
assert_equal([html, head, meta, body, h1, h1_content], elements)
|
102
|
+
end
|
73
103
|
end
|
data/test/test_htmlformat.rb
CHANGED
@@ -185,6 +185,19 @@ HTML
|
|
185
185
|
assert_equal(html,convert_text_to_html(text))
|
186
186
|
end
|
187
187
|
|
188
|
+
def test_table_with_empty_cell_at_the_end
|
189
|
+
row = "||cell 1||cell 2||"
|
190
|
+
html = <<HTML
|
191
|
+
<table>
|
192
|
+
<tr><td>cell 1</td><td>cell 2</td><td></td></tr>
|
193
|
+
</table>
|
194
|
+
HTML
|
195
|
+
|
196
|
+
# <tr><td>cell 1</td><td>cell 2</td><td> </td></tr>
|
197
|
+
|
198
|
+
assert_equal(html,convert_text_to_html(row))
|
199
|
+
end
|
200
|
+
|
188
201
|
def test_hr
|
189
202
|
text = <<TEXT
|
190
203
|
paragraph
|
@@ -352,6 +365,52 @@ HTML
|
|
352
365
|
assert_equal(xhtml5, Xhtml5Format.format(tree).to_s)
|
353
366
|
end
|
354
367
|
|
368
|
+
def test_string_as_input
|
369
|
+
text = <<TEXT
|
370
|
+
!heading1
|
371
|
+
|
372
|
+
paragraph1.
|
373
|
+
paragraph2.
|
374
|
+
""citation1
|
375
|
+
paragraph3.
|
376
|
+
----
|
377
|
+
|
378
|
+
*list1
|
379
|
+
*list2
|
380
|
+
TEXT
|
381
|
+
|
382
|
+
html = <<HTML
|
383
|
+
<div class="section h1">
|
384
|
+
<h1>heading1
|
385
|
+
</h1>
|
386
|
+
<p>
|
387
|
+
paragraph1.
|
388
|
+
paragraph2.
|
389
|
+
</p>
|
390
|
+
<blockquote>
|
391
|
+
<p>
|
392
|
+
citation1
|
393
|
+
</p>
|
394
|
+
</blockquote>
|
395
|
+
<p>
|
396
|
+
paragraph3.
|
397
|
+
</p>
|
398
|
+
<hr />
|
399
|
+
<ul>
|
400
|
+
<li>list1
|
401
|
+
</li>
|
402
|
+
<li>list2
|
403
|
+
</li>
|
404
|
+
</ul>
|
405
|
+
<!-- end of section h1 -->
|
406
|
+
</div>
|
407
|
+
HTML
|
408
|
+
|
409
|
+
formatter = XhtmlFormat.get_plain
|
410
|
+
tree = BlockParser.parse(text)
|
411
|
+
assert_equal(html,tree.accept(formatter).to_s)
|
412
|
+
end
|
413
|
+
|
355
414
|
def test_xhtml_list
|
356
415
|
text = <<TEXT
|
357
416
|
*list1(1)
|
@@ -502,6 +561,8 @@ a verbatim line with [[a link]]
|
|
502
561
|
|
503
562
|
another verbatim line
|
504
563
|
|
564
|
+
a verbatim line that begins with a space.
|
565
|
+
|
505
566
|
the last verbatim line
|
506
567
|
>>>
|
507
568
|
TEXT
|
@@ -510,6 +571,9 @@ TEXT
|
|
510
571
|
a verbatim line with [[a link]]
|
511
572
|
|
512
573
|
another verbatim line
|
574
|
+
|
575
|
+
a verbatim line that begins with a space.
|
576
|
+
|
513
577
|
|
514
578
|
the last verbatim line
|
515
579
|
TEXT
|
@@ -521,6 +585,8 @@ a verbatim line with [[a link]]
|
|
521
585
|
|
522
586
|
another verbatim line
|
523
587
|
|
588
|
+
a verbatim line that begins with a space.
|
589
|
+
|
524
590
|
the last verbatim line
|
525
591
|
</pre>
|
526
592
|
HTML
|