pseudohikiparser 0.0.0.4.develop
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/pseudohiki2html.rb +319 -0
- data/lib/htmlelement/htmltemplate.rb +169 -0
- data/lib/htmlelement.rb +172 -0
- data/lib/pseudohiki/blockparser.rb +359 -0
- data/lib/pseudohiki/htmlformat.rb +229 -0
- data/lib/pseudohiki/htmlplugin.rb +155 -0
- data/lib/pseudohiki/inlineparser.rb +169 -0
- data/lib/pseudohiki/plaintextformat.rb +235 -0
- data/lib/pseudohiki/treestack.rb +119 -0
- data/lib/pseudohiki/version.rb +3 -0
- data/lib/pseudohikiparser.rb +6 -0
- data/test/test_blockparser.rb +313 -0
- data/test/test_htmlelement.rb +73 -0
- data/test/test_htmlformat.rb +538 -0
- data/test/test_htmlplugin.rb +14 -0
- data/test/test_htmltemplate.rb +190 -0
- data/test/test_inlineparser.rb +94 -0
- data/test/test_plaintextformat.rb +205 -0
- data/test/test_treestack.rb +133 -0
- metadata +107 -0
@@ -0,0 +1,319 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'erb'
|
6
|
+
require 'pseudohiki/blockparser'
|
7
|
+
require 'pseudohiki/htmlformat'
|
8
|
+
require 'pseudohiki/plaintextformat'
|
9
|
+
require 'htmlelement/htmltemplate'
|
10
|
+
require 'htmlelement'
|
11
|
+
|
12
|
+
include PseudoHiki
|
13
|
+
|
14
|
+
OPTIONS = {
|
15
|
+
:html_version => "html4",
|
16
|
+
:lang => 'en',
|
17
|
+
:encoding => 'utf8',
|
18
|
+
:title => nil,
|
19
|
+
:css => "default.css",
|
20
|
+
:embed_css => nil,
|
21
|
+
:base => nil,
|
22
|
+
:template => nil,
|
23
|
+
:output => nil,
|
24
|
+
:force => false,
|
25
|
+
:toc => nil
|
26
|
+
}
|
27
|
+
|
28
|
+
ENCODING_REGEXP = {
|
29
|
+
/^u/io => 'utf8',
|
30
|
+
/^e/io => 'euc-jp',
|
31
|
+
/^s/io => 'sjis',
|
32
|
+
/^l[a-zA-Z]*1/io => 'latin1'
|
33
|
+
}
|
34
|
+
|
35
|
+
HTML_VERSIONS = %w(html4 xhtml1 html5)
|
36
|
+
|
37
|
+
FILE_HEADER_PAT = /^(\xef\xbb\xbf)?\/\//
|
38
|
+
WRITTEN_OPTION_PAT = {}
|
39
|
+
OPTIONS.keys.each {|opt| WRITTEN_OPTION_PAT[opt] = /^(\xef\xbb\xbf)?\/\/#{opt}:\s*(.*)$/ }
|
40
|
+
HEADING_WITH_ID_PAT = /^(!{2,3})\[([A-Za-z][0-9A-Za-z_\-.:]*)\]/o
|
41
|
+
|
42
|
+
PlainFormat = PlainTextFormat.create
|
43
|
+
|
44
|
+
class InputManager
|
45
|
+
def formatter
|
46
|
+
@formatter ||= OPTIONS.html_template.new
|
47
|
+
end
|
48
|
+
|
49
|
+
def create_table_of_contents(lines)
|
50
|
+
toc_lines = lines.grep(HEADING_WITH_ID_PAT).map do |line|
|
51
|
+
m = HEADING_WITH_ID_PAT.match(line)
|
52
|
+
heading_depth, id = m[1].length, m[2].upcase
|
53
|
+
"%s[[%s|#%s]]"%['*'*heading_depth, to_plain(line.sub(HEADING_WITH_ID_PAT,'')), id]
|
54
|
+
end
|
55
|
+
OPTIONS.formatter.format(BlockParser.parse(toc_lines))
|
56
|
+
end
|
57
|
+
|
58
|
+
def create_main(toc, body)
|
59
|
+
return nil unless OPTIONS[:toc]
|
60
|
+
toc_container = formatter.create_element("section").tap do |element|
|
61
|
+
element["id"] = "toc"
|
62
|
+
element.push formatter.create_element("h2", OPTIONS[:toc]) unless OPTIONS[:toc].empty?
|
63
|
+
element.push toc
|
64
|
+
end
|
65
|
+
contents_container = formatter.create_element("section").tap do |element|
|
66
|
+
element["id"] = "contents"
|
67
|
+
element.push body
|
68
|
+
end
|
69
|
+
main = formatter.create_element("section").tap do |element|
|
70
|
+
element["id"] = "main"
|
71
|
+
element.push toc_container
|
72
|
+
element.push contents_container
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_style(path_to_css_file)
|
77
|
+
style = formatter.create_element("style").tap do |element|
|
78
|
+
element["type"] = "text/css"
|
79
|
+
open(File.expand_path(path_to_css_file)) do |css_file|
|
80
|
+
element.push css_file.read
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def compose_body(input_lines)
|
86
|
+
tree = BlockParser.parse(input_lines)
|
87
|
+
OPTIONS.formatter.format(tree)
|
88
|
+
end
|
89
|
+
|
90
|
+
def compose_html(input_lines)
|
91
|
+
css = OPTIONS[:css]
|
92
|
+
toc = create_table_of_contents(input_lines)
|
93
|
+
body = compose_body(input_lines)
|
94
|
+
title = OPTIONS.title
|
95
|
+
main = create_main(toc,body)
|
96
|
+
|
97
|
+
if OPTIONS[:template]
|
98
|
+
erb = ERB.new(OPTIONS.read_template_file)
|
99
|
+
html = erb.result(binding)
|
100
|
+
else
|
101
|
+
html = OPTIONS.create_html_with_current_options
|
102
|
+
html.head.push create_style(OPTIONS[:embed_css]) if OPTIONS[:embed_css]
|
103
|
+
html.push main||body
|
104
|
+
end
|
105
|
+
|
106
|
+
html
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_plain(line)
|
111
|
+
PlainFormat.format(BlockParser.parse(line.lines.to_a)).to_s.chomp
|
112
|
+
end
|
113
|
+
|
114
|
+
def win32?
|
115
|
+
true if RUBY_PLATFORM =~ /win/i
|
116
|
+
end
|
117
|
+
|
118
|
+
def value_given?(value)
|
119
|
+
value and not value.empty?
|
120
|
+
end
|
121
|
+
|
122
|
+
class << OPTIONS
|
123
|
+
include HtmlElement::CHARSET
|
124
|
+
attr_accessor :need_output_file, :default_title
|
125
|
+
attr_reader :input_file_basename
|
126
|
+
|
127
|
+
ENCODING_TO_CHARSET = {
|
128
|
+
'utf8' => UTF8,
|
129
|
+
'euc-jp' => EUC_JP,
|
130
|
+
'sjis' => SJIS,
|
131
|
+
'latin1' => LATIN1
|
132
|
+
}
|
133
|
+
|
134
|
+
HTML_TEMPLATES = Hash[*HTML_VERSIONS.zip([HtmlTemplate, XhtmlTemplate, Xhtml5Template]).flatten]
|
135
|
+
FORMATTERS = Hash[*HTML_VERSIONS.zip([HtmlFormat, XhtmlFormat, Xhtml5Format]).flatten]
|
136
|
+
|
137
|
+
def html_template
|
138
|
+
HTML_TEMPLATES[self[:html_version]]
|
139
|
+
end
|
140
|
+
|
141
|
+
def formatter
|
142
|
+
FORMATTERS[self[:html_version]]
|
143
|
+
end
|
144
|
+
|
145
|
+
def charset
|
146
|
+
ENCODING_TO_CHARSET[self[:encoding]]
|
147
|
+
end
|
148
|
+
|
149
|
+
def base
|
150
|
+
base_dir = self[:base]
|
151
|
+
if base_dir and base_dir !~ /[\/\\]\.*$/o
|
152
|
+
base_dir = File.join(base_dir,".")
|
153
|
+
base_dir = "file:///"+base_dir if base_dir !~ /^\./o and win32?
|
154
|
+
end
|
155
|
+
base_dir
|
156
|
+
end
|
157
|
+
|
158
|
+
def title
|
159
|
+
OPTIONS[:title]||@default_title||"-"
|
160
|
+
end
|
161
|
+
|
162
|
+
def read_template_file
|
163
|
+
File.read(File.expand_path(self[:template]))
|
164
|
+
end
|
165
|
+
|
166
|
+
def set_html_version(version)
|
167
|
+
if HTML_VERSIONS.include? version
|
168
|
+
self[:html_version] = version
|
169
|
+
else
|
170
|
+
case version
|
171
|
+
when /^x/io
|
172
|
+
self[:html_version] = HTML_VERSIONS[1] #xhtml1
|
173
|
+
when /^h5/io
|
174
|
+
self[:html_version] = HTML_VERSIONS[2] #html5
|
175
|
+
end
|
176
|
+
STDERR.puts "\"#{version}\" is an invalid option for --html_version. \"#{self[:html_version]}\" is chosen instead."
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def set_encoding(given_opt)
|
181
|
+
if ENCODING_REGEXP.values.include? given_opt
|
182
|
+
self[:encoding] = given_opt
|
183
|
+
else
|
184
|
+
ENCODING_REGEXP.each do |pat, encoding|
|
185
|
+
self[:encoding] = encoding if pat =~ given_opt
|
186
|
+
end
|
187
|
+
STDERR.puts "\"#{self[:encoding]}\" is chosen as an encoding system, instead of \"#{given_opt}\"."
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def set_options_from_input_file(input_lines)
|
192
|
+
input_lines.each do |line|
|
193
|
+
break if FILE_HEADER_PAT !~ line
|
194
|
+
line = line.chomp
|
195
|
+
self.keys.each do |opt|
|
196
|
+
if WRITTEN_OPTION_PAT[opt] =~ line and not self[:force]
|
197
|
+
self[opt] = $2
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def create_html_with_current_options
|
204
|
+
html = self.html_template.new
|
205
|
+
html.charset = self.charset
|
206
|
+
html.language = self[:lang]
|
207
|
+
html.default_css = self[:css] if self[:css]
|
208
|
+
html.base = self.base if self[:base]
|
209
|
+
html.title = self.title
|
210
|
+
html
|
211
|
+
end
|
212
|
+
|
213
|
+
def read_input_filename(filename)
|
214
|
+
@input_file_dir, @input_file_name = File.split(File.expand_path(filename))
|
215
|
+
@input_file_basename = File.basename(@input_file_name,".*")
|
216
|
+
end
|
217
|
+
|
218
|
+
def output_file_name
|
219
|
+
return nil unless self.need_output_file
|
220
|
+
if self[:output]
|
221
|
+
File.expand_path(self[:output])
|
222
|
+
else
|
223
|
+
File.join(@input_file_dir, @input_file_basename+".html")
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
OptionParser.new("** Convert texts written in a Hiki-like notation into HTML **
|
229
|
+
USAGE: #{File.basename(__FILE__)} [options]") do |opt|
|
230
|
+
opt.on("-h [html_version]", "--html_version [=html_version]",
|
231
|
+
"HTML version to be used. Choose html4 or xhtml1 (default: #{OPTIONS[:html_version]})") do |version|
|
232
|
+
OPTIONS.set_html_version(version)
|
233
|
+
end
|
234
|
+
|
235
|
+
opt.on("-l [lang]", "--lang [=lang]",
|
236
|
+
"Set the value of charset attributes (default: #{OPTIONS[:lang]})") do |lang|
|
237
|
+
OPTIONS[:lang] = lang if value_given?(lang)
|
238
|
+
end
|
239
|
+
|
240
|
+
opt.on("-e [encoding]", "--encoding [=encoding]",
|
241
|
+
"Available options: utf8, euc-jp, sjis, latin1 (default: #{OPTIONS[:encoding]})") do |given_opt|
|
242
|
+
OPTIONS.set_encoding(given_opt)
|
243
|
+
end
|
244
|
+
|
245
|
+
#use '-w' to avoid the conflict with the short option for '[-t]emplate'
|
246
|
+
opt.on("-w [(window) title]", "--title [=title]",
|
247
|
+
"Set the value of the <title> element (default: the basename of the input file)") do |title|
|
248
|
+
OPTIONS[:title] = title if value_given?(title)
|
249
|
+
end
|
250
|
+
|
251
|
+
opt.on("-c [css]", "--css [=css]",
|
252
|
+
"Set the path to a css file to be used (default: #{OPTIONS[:css]})") do |css|
|
253
|
+
OPTIONS[:css] = css
|
254
|
+
end
|
255
|
+
|
256
|
+
opt.on("-C [path_to_css_file]", "--embed-css [=path_to_css_file]",
|
257
|
+
"Set the path to a css file to be used (default: not to embed)") do |path_to_css_file|
|
258
|
+
OPTIONS[:embed_css] = path_to_css_file
|
259
|
+
end
|
260
|
+
|
261
|
+
opt.on("-b [base]", "--base [=base]",
|
262
|
+
"Specify the value of href attribute of the <base> element (default: not specified)") do |base_dir|
|
263
|
+
OPTIONS[:base] = base_dir if value_given?(base_dir)
|
264
|
+
end
|
265
|
+
|
266
|
+
opt.on("-t [template]", "--template [=template]",
|
267
|
+
"Specify a template file written in eruby format with \"<%= body %>\" inside (default: not specified)") do |template|
|
268
|
+
OPTIONS[:template] = template if value_given?(template)
|
269
|
+
end
|
270
|
+
|
271
|
+
opt.on("-o [output]", "--output [=output]",
|
272
|
+
"Output to the specified file. If no file is given, \"[input_file_basename].html\" will be used.(default: STDOUT)") do |output|
|
273
|
+
OPTIONS[:output] = File.expand_path(output) if value_given?(output)
|
274
|
+
OPTIONS.need_output_file = true
|
275
|
+
end
|
276
|
+
|
277
|
+
opt.on("-f", "--force",
|
278
|
+
"Force to apply command line options.(default: false)") do |force|
|
279
|
+
OPTIONS[:force] = force
|
280
|
+
end
|
281
|
+
|
282
|
+
opt.on("-m [contents-title]", "--table-of-contents [=contents-title]",
|
283
|
+
"Include the list of h2 and/or h3 headings with ids.(default: nil)") do |toc_title|
|
284
|
+
OPTIONS[:toc] = toc_title
|
285
|
+
end
|
286
|
+
|
287
|
+
opt.parse!
|
288
|
+
end
|
289
|
+
|
290
|
+
if $KCODE
|
291
|
+
ENCODING_REGEXP.each do |pat, encoding|
|
292
|
+
OPTIONS[:encoding] = encoding if pat =~ $KCODE and not OPTIONS[:force]
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
input_manager = InputManager.new
|
297
|
+
|
298
|
+
case ARGV.length
|
299
|
+
when 0
|
300
|
+
if OPTIONS.need_output_file and not OPTIONS[:output]
|
301
|
+
raise "You must specify a file name for output"
|
302
|
+
end
|
303
|
+
when 1
|
304
|
+
OPTIONS.read_input_filename(ARGV[0])
|
305
|
+
end
|
306
|
+
|
307
|
+
input_lines = ARGF.lines.to_a
|
308
|
+
|
309
|
+
OPTIONS.set_options_from_input_file(input_lines)
|
310
|
+
OPTIONS.default_title = OPTIONS.input_file_basename
|
311
|
+
|
312
|
+
html = input_manager.compose_html(input_lines)
|
313
|
+
output_file_name = OPTIONS.output_file_name
|
314
|
+
|
315
|
+
if output_file_name
|
316
|
+
open(output_file_name, "w") {|f| f.puts html }
|
317
|
+
else
|
318
|
+
STDOUT.puts html
|
319
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'htmlelement'
|
4
|
+
|
5
|
+
class HtmlTemplate
|
6
|
+
|
7
|
+
META_CHARSET = "text/html; charset=%s"
|
8
|
+
LANGUAGE = Hash.new("en")
|
9
|
+
LANGUAGE[HtmlElement::CHARSET::EUC_JP] = "ja"
|
10
|
+
LANGUAGE[HtmlElement::CHARSET::SJIS] = "ja"
|
11
|
+
ELEMENT = { self => HtmlElement }
|
12
|
+
|
13
|
+
def initialize(charset=ELEMENT[self.class]::CHARSET::UTF8, language="en", css_link="default.css", base_uri=nil)
|
14
|
+
@html = create_element("html", nil, "lang" => language)
|
15
|
+
@head = create_element("head")
|
16
|
+
@charset = charset
|
17
|
+
@content_language = create_meta("Content-Language", language)
|
18
|
+
@base = set_path_to_base(base_uri)
|
19
|
+
@content_type = set_charset_in_meta(charset)
|
20
|
+
@content_style_type = create_meta("Content-Style-Type","text/css")
|
21
|
+
@content_script_type = create_meta("Content-Script-Type","text/javascript")
|
22
|
+
@default_css_link = create_css_link(css_link)
|
23
|
+
@title = nil
|
24
|
+
@title_element = create_element("title")
|
25
|
+
@body = create_element("body")
|
26
|
+
@html.push @head
|
27
|
+
@html.push @body
|
28
|
+
[ @content_language,
|
29
|
+
@content_type,
|
30
|
+
@content_sytle_type,
|
31
|
+
@content_script_type,
|
32
|
+
@title_element,
|
33
|
+
@base,
|
34
|
+
@default_css_link
|
35
|
+
].each do |element|
|
36
|
+
@head.push element
|
37
|
+
end
|
38
|
+
end
|
39
|
+
attr_reader :title, :head
|
40
|
+
|
41
|
+
def create_element(*params)
|
42
|
+
ELEMENT[self.class].create(*params)
|
43
|
+
end
|
44
|
+
|
45
|
+
def charset=(charset_name)
|
46
|
+
@charset=charset_name
|
47
|
+
@content_language["content"] = LANGUAGE[@charset]
|
48
|
+
@content_type["content"] = META_CHARSET%[charset_name]
|
49
|
+
end
|
50
|
+
|
51
|
+
def language=(language)
|
52
|
+
@content_language["content"] = language
|
53
|
+
@html["lang"] = language
|
54
|
+
end
|
55
|
+
|
56
|
+
def base=(base_uri)
|
57
|
+
if @base.empty?
|
58
|
+
@base = create_element("base", nil, "href" => base_uri)
|
59
|
+
@head.push @base
|
60
|
+
else
|
61
|
+
@base["href"] = base_uri
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_css_file(file_path)
|
66
|
+
@head.push create_css_link(file_path)
|
67
|
+
end
|
68
|
+
|
69
|
+
def default_css=(file_path)
|
70
|
+
@default_css_link["href"] = file_path
|
71
|
+
end
|
72
|
+
|
73
|
+
def title=(title)
|
74
|
+
@title_element.pop until @title_element.empty?
|
75
|
+
@title = title
|
76
|
+
@title_element.push title
|
77
|
+
end
|
78
|
+
|
79
|
+
def push(element)
|
80
|
+
@body.push element
|
81
|
+
end
|
82
|
+
|
83
|
+
def euc_jp!
|
84
|
+
self.charset = ELEMENT[self.class]::CHARSET::EUC_JP
|
85
|
+
self.language = "ja"
|
86
|
+
end
|
87
|
+
|
88
|
+
def sjis!
|
89
|
+
self.charset = ELEMENT[self.class]::CHARSET::SJIS
|
90
|
+
self.language = "ja"
|
91
|
+
end
|
92
|
+
|
93
|
+
def utf8!
|
94
|
+
self.charset = ELEMENT[self.class]::CHARSET::UTF8
|
95
|
+
end
|
96
|
+
|
97
|
+
def latin1!
|
98
|
+
self.charset = ELEMENT[self.class]::CHARSET::LATIN1
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_s
|
102
|
+
[ELEMENT[self.class].doctype(@charset),
|
103
|
+
@html].join("")
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def create_meta(type,content)
|
109
|
+
create_element("meta", nil,
|
110
|
+
"http-equiv" => type,
|
111
|
+
"content" => content)
|
112
|
+
end
|
113
|
+
|
114
|
+
def create_css_link(file_path)
|
115
|
+
create_element("link", nil,
|
116
|
+
"rel" => "stylesheet",
|
117
|
+
"type" => "text/css",
|
118
|
+
"href" => file_path)
|
119
|
+
end
|
120
|
+
|
121
|
+
def set_charset_in_meta(charset)
|
122
|
+
create_meta("Content-Type",META_CHARSET%[charset])
|
123
|
+
end
|
124
|
+
|
125
|
+
def set_path_to_base(base_uri)
|
126
|
+
return "" unless base_uri
|
127
|
+
create_element("base", nil, "href" => base_uri)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class XhtmlTemplate < HtmlTemplate
|
132
|
+
ELEMENT[self] = XhtmlElement
|
133
|
+
|
134
|
+
def initialize(*params)
|
135
|
+
super(*params)
|
136
|
+
@html['xmlns'] = 'http://www.w3.org/1999/xhtml'
|
137
|
+
@html["xml:lang"] = @html["lang"] #language
|
138
|
+
end
|
139
|
+
|
140
|
+
def language=(language)
|
141
|
+
super(language)
|
142
|
+
@html["xml:lang"] = language
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class Xhtml5Template < XhtmlTemplate
|
147
|
+
ELEMENT[self] = Xhtml5Element
|
148
|
+
|
149
|
+
def initialize(*params)
|
150
|
+
super(*params)
|
151
|
+
|
152
|
+
def @content_language.to_str
|
153
|
+
""
|
154
|
+
end
|
155
|
+
|
156
|
+
def @content_script_type.to_str
|
157
|
+
""
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def set_charset_in_meta(charset)
|
162
|
+
create_element("meta", nil, "charset" => charset)
|
163
|
+
end
|
164
|
+
|
165
|
+
def charset=(charset_name)
|
166
|
+
@charset=charset_name
|
167
|
+
@content_type["charset"] = @charset
|
168
|
+
end
|
169
|
+
end
|
data/lib/htmlelement.rb
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'kconv'
|
4
|
+
|
5
|
+
class HtmlElement
|
6
|
+
class Children < Array
|
7
|
+
def to_s
|
8
|
+
self.join
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module CHARSET
|
13
|
+
EUC_JP = "EUC-JP"
|
14
|
+
SJIS = "Shift_JIS"
|
15
|
+
UTF8 = "UTF-8"
|
16
|
+
LATIN1 = "ISO-8859-1"
|
17
|
+
end
|
18
|
+
|
19
|
+
DOCTYPE = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
20
|
+
"http://www.w3.org/TR/html4/loose.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
|
21
|
+
|
22
|
+
ESC = {
|
23
|
+
'&' => '&',
|
24
|
+
'"' => '"',
|
25
|
+
'<' => '<',
|
26
|
+
'>' => '>'
|
27
|
+
}
|
28
|
+
|
29
|
+
DECODE = ESC.invert
|
30
|
+
CharEntityPat = /#{DECODE.keys.join("|")}/
|
31
|
+
|
32
|
+
Html5Tags = %w(article section hgroup aside nav menu header footer figure details legend)
|
33
|
+
|
34
|
+
ELEMENT_TYPES = {
|
35
|
+
:BLOCK => %w(html body div table colgroup thead tbody ul ol dl head p pre blockquote style),
|
36
|
+
:HEADING_TYPE_BLOCK => %w(dt dd tr title h1 h2 h3 h4 h5 h6),
|
37
|
+
:LIST_ITEM_TYPE_BLOCK => %w(li col),
|
38
|
+
:EMPTY_BLOCK => %w(img meta link base input hr)
|
39
|
+
}
|
40
|
+
|
41
|
+
ELEMENTS_FORMAT = {
|
42
|
+
:INLINE => "<%s%s>%s</%s>",
|
43
|
+
:BLOCK => "<%s%s>#{$/}%s</%s>#{$/}",
|
44
|
+
:HEADING_TYPE_BLOCK => "<%s%s>%s</%s>#{$/}",
|
45
|
+
:LIST_ITEM_TYPE_BLOCK => "<%s%s>%s#{$/}",
|
46
|
+
:EMPTY_BLOCK => "<%s%s>#{$/}"
|
47
|
+
}
|
48
|
+
|
49
|
+
attr_reader :tagname
|
50
|
+
attr_accessor :parent, :children
|
51
|
+
|
52
|
+
def self.doctype(encoding="UTF-8")
|
53
|
+
self::DOCTYPE%[encoding]
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.create(tagname, content=nil, attributes={})
|
57
|
+
if self::Html5Tags.include? tagname
|
58
|
+
attributes["class"] = tagname
|
59
|
+
tagname = "div"
|
60
|
+
end
|
61
|
+
self.new(tagname, content, attributes)
|
62
|
+
end
|
63
|
+
|
64
|
+
def HtmlElement.comment(content)
|
65
|
+
"<!-- #{content} -->#{$/}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def HtmlElement.urlencode(str)
|
69
|
+
str.toutf8.gsub(/[^\w\.\-]/n) {|ch| format('%%%02X', ch[0]) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def HtmlElement.urldecode(str)
|
73
|
+
utf = str.gsub(/%\w\w/) {|ch| [ch[-2,2]].pack('H*') }
|
74
|
+
return utf.tosjis if $KCODE =~ /^s/io
|
75
|
+
return utf.toeuc if $KCODE =~ /^e/io
|
76
|
+
utf
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.assign_tagformats
|
80
|
+
tagformats = Hash.new(ELEMENTS_FORMAT[:INLINE])
|
81
|
+
self::ELEMENT_TYPES.each do |type, names|
|
82
|
+
names.each {|name| tagformats[name] = self::ELEMENTS_FORMAT[type] }
|
83
|
+
end
|
84
|
+
tagformats[""] = "%s%s%s"
|
85
|
+
tagformats
|
86
|
+
end
|
87
|
+
|
88
|
+
def HtmlElement.escape(str)
|
89
|
+
str.gsub(/[&"<>]/on) {|pat| ESC[pat] }
|
90
|
+
end
|
91
|
+
|
92
|
+
def HtmlElement.decode(str)
|
93
|
+
str.gsub(CharEntityPat) {|ent| DECODE[ent]}
|
94
|
+
end
|
95
|
+
|
96
|
+
TagFormats = self.assign_tagformats
|
97
|
+
|
98
|
+
def initialize(tagname, content=nil, attributes={})
|
99
|
+
@parent = nil
|
100
|
+
@tagname = tagname
|
101
|
+
@children = Children.new
|
102
|
+
@children.push content if content
|
103
|
+
@attributes = attributes
|
104
|
+
@end_comment_not_added = true
|
105
|
+
end
|
106
|
+
|
107
|
+
def empty?
|
108
|
+
@children.empty?
|
109
|
+
end
|
110
|
+
|
111
|
+
def push(child)
|
112
|
+
@children.push child
|
113
|
+
child.parent = self if child.kind_of? HtmlElement
|
114
|
+
self
|
115
|
+
end
|
116
|
+
|
117
|
+
def pop
|
118
|
+
@children.pop
|
119
|
+
end
|
120
|
+
|
121
|
+
def []=(attribute, value)
|
122
|
+
@attributes[attribute] = value
|
123
|
+
end
|
124
|
+
|
125
|
+
def [](attribute)
|
126
|
+
@attributes[attribute]
|
127
|
+
end
|
128
|
+
|
129
|
+
def format_attributes
|
130
|
+
@attributes.collect do |attr,value|
|
131
|
+
' %s="%s"'%[attr,HtmlElement.escape(value.to_s)]
|
132
|
+
end.sort.join
|
133
|
+
end
|
134
|
+
private :format_attributes
|
135
|
+
|
136
|
+
def add_end_comment_for_div_or_section
|
137
|
+
if @tagname == "div" or @tagname == "section" and @end_comment_not_added
|
138
|
+
id_or_class = self["id"]||self["class"]
|
139
|
+
self.push HtmlElement.comment("end of #{id_or_class}") if id_or_class
|
140
|
+
@end_comment_not_added = false
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_s
|
145
|
+
add_end_comment_for_div_or_section
|
146
|
+
self.class::TagFormats[@tagname]%[@tagname, format_attributes, @children, @tagname]
|
147
|
+
end
|
148
|
+
alias to_str to_s
|
149
|
+
end
|
150
|
+
|
151
|
+
class XhtmlElement < HtmlElement
|
152
|
+
DOCTYPE = '<?xml version="1.0" encoding="%s"?>
|
153
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
154
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
|
155
|
+
|
156
|
+
ELEMENTS_FORMAT = self.superclass::ELEMENTS_FORMAT.dup
|
157
|
+
ELEMENTS_FORMAT[:LIST_ITEM_TYPE_BLOCK] = "<%s%s>%s</%s>#{$/}"
|
158
|
+
ELEMENTS_FORMAT[:EMPTY_BLOCK] = "<%s%s />#{$/}"
|
159
|
+
|
160
|
+
TagFormats = self.assign_tagformats
|
161
|
+
end
|
162
|
+
|
163
|
+
class Xhtml5Element < XhtmlElement
|
164
|
+
DOCTYPE = '<?xml version="1.0" encoding="%s"?>
|
165
|
+
<!DOCTYPE html>'.split(/\r?\n/o).join($/)+"#{$/}"
|
166
|
+
|
167
|
+
ELEMENT_TYPES = self.superclass::ELEMENT_TYPES.dup
|
168
|
+
ELEMENT_TYPES[:BLOCK] = self.superclass::ELEMENT_TYPES[:BLOCK] + self.superclass::Html5Tags
|
169
|
+
Html5Tags = %w(main)
|
170
|
+
|
171
|
+
TagFormats = self.assign_tagformats
|
172
|
+
end
|