pseudohikiparser 0.0.0.4.develop
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/pseudohiki2html.rb +319 -0
- data/lib/htmlelement/htmltemplate.rb +169 -0
- data/lib/htmlelement.rb +172 -0
- data/lib/pseudohiki/blockparser.rb +359 -0
- data/lib/pseudohiki/htmlformat.rb +229 -0
- data/lib/pseudohiki/htmlplugin.rb +155 -0
- data/lib/pseudohiki/inlineparser.rb +169 -0
- data/lib/pseudohiki/plaintextformat.rb +235 -0
- data/lib/pseudohiki/treestack.rb +119 -0
- data/lib/pseudohiki/version.rb +3 -0
- data/lib/pseudohikiparser.rb +6 -0
- data/test/test_blockparser.rb +313 -0
- data/test/test_htmlelement.rb +73 -0
- data/test/test_htmlformat.rb +538 -0
- data/test/test_htmlplugin.rb +14 -0
- data/test/test_htmltemplate.rb +190 -0
- data/test/test_inlineparser.rb +94 -0
- data/test/test_plaintextformat.rb +205 -0
- data/test/test_treestack.rb +133 -0
- metadata +107 -0
@@ -0,0 +1,319 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
require 'erb'
|
6
|
+
require 'pseudohiki/blockparser'
|
7
|
+
require 'pseudohiki/htmlformat'
|
8
|
+
require 'pseudohiki/plaintextformat'
|
9
|
+
require 'htmlelement/htmltemplate'
|
10
|
+
require 'htmlelement'
|
11
|
+
|
12
|
+
include PseudoHiki
|
13
|
+
|
14
|
+
OPTIONS = {
|
15
|
+
:html_version => "html4",
|
16
|
+
:lang => 'en',
|
17
|
+
:encoding => 'utf8',
|
18
|
+
:title => nil,
|
19
|
+
:css => "default.css",
|
20
|
+
:embed_css => nil,
|
21
|
+
:base => nil,
|
22
|
+
:template => nil,
|
23
|
+
:output => nil,
|
24
|
+
:force => false,
|
25
|
+
:toc => nil
|
26
|
+
}
|
27
|
+
|
28
|
+
ENCODING_REGEXP = {
|
29
|
+
/^u/io => 'utf8',
|
30
|
+
/^e/io => 'euc-jp',
|
31
|
+
/^s/io => 'sjis',
|
32
|
+
/^l[a-zA-Z]*1/io => 'latin1'
|
33
|
+
}
|
34
|
+
|
35
|
+
HTML_VERSIONS = %w(html4 xhtml1 html5)
|
36
|
+
|
37
|
+
FILE_HEADER_PAT = /^(\xef\xbb\xbf)?\/\//
|
38
|
+
WRITTEN_OPTION_PAT = {}
|
39
|
+
OPTIONS.keys.each {|opt| WRITTEN_OPTION_PAT[opt] = /^(\xef\xbb\xbf)?\/\/#{opt}:\s*(.*)$/ }
|
40
|
+
HEADING_WITH_ID_PAT = /^(!{2,3})\[([A-Za-z][0-9A-Za-z_\-.:]*)\]/o
|
41
|
+
|
42
|
+
PlainFormat = PlainTextFormat.create
|
43
|
+
|
44
|
+
class InputManager
|
45
|
+
def formatter
|
46
|
+
@formatter ||= OPTIONS.html_template.new
|
47
|
+
end
|
48
|
+
|
49
|
+
def create_table_of_contents(lines)
|
50
|
+
toc_lines = lines.grep(HEADING_WITH_ID_PAT).map do |line|
|
51
|
+
m = HEADING_WITH_ID_PAT.match(line)
|
52
|
+
heading_depth, id = m[1].length, m[2].upcase
|
53
|
+
"%s[[%s|#%s]]"%['*'*heading_depth, to_plain(line.sub(HEADING_WITH_ID_PAT,'')), id]
|
54
|
+
end
|
55
|
+
OPTIONS.formatter.format(BlockParser.parse(toc_lines))
|
56
|
+
end
|
57
|
+
|
58
|
+
def create_main(toc, body)
|
59
|
+
return nil unless OPTIONS[:toc]
|
60
|
+
toc_container = formatter.create_element("section").tap do |element|
|
61
|
+
element["id"] = "toc"
|
62
|
+
element.push formatter.create_element("h2", OPTIONS[:toc]) unless OPTIONS[:toc].empty?
|
63
|
+
element.push toc
|
64
|
+
end
|
65
|
+
contents_container = formatter.create_element("section").tap do |element|
|
66
|
+
element["id"] = "contents"
|
67
|
+
element.push body
|
68
|
+
end
|
69
|
+
main = formatter.create_element("section").tap do |element|
|
70
|
+
element["id"] = "main"
|
71
|
+
element.push toc_container
|
72
|
+
element.push contents_container
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_style(path_to_css_file)
|
77
|
+
style = formatter.create_element("style").tap do |element|
|
78
|
+
element["type"] = "text/css"
|
79
|
+
open(File.expand_path(path_to_css_file)) do |css_file|
|
80
|
+
element.push css_file.read
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def compose_body(input_lines)
|
86
|
+
tree = BlockParser.parse(input_lines)
|
87
|
+
OPTIONS.formatter.format(tree)
|
88
|
+
end
|
89
|
+
|
90
|
+
def compose_html(input_lines)
|
91
|
+
css = OPTIONS[:css]
|
92
|
+
toc = create_table_of_contents(input_lines)
|
93
|
+
body = compose_body(input_lines)
|
94
|
+
title = OPTIONS.title
|
95
|
+
main = create_main(toc,body)
|
96
|
+
|
97
|
+
if OPTIONS[:template]
|
98
|
+
erb = ERB.new(OPTIONS.read_template_file)
|
99
|
+
html = erb.result(binding)
|
100
|
+
else
|
101
|
+
html = OPTIONS.create_html_with_current_options
|
102
|
+
html.head.push create_style(OPTIONS[:embed_css]) if OPTIONS[:embed_css]
|
103
|
+
html.push main||body
|
104
|
+
end
|
105
|
+
|
106
|
+
html
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_plain(line)
|
111
|
+
PlainFormat.format(BlockParser.parse(line.lines.to_a)).to_s.chomp
|
112
|
+
end
|
113
|
+
|
114
|
+
def win32?
|
115
|
+
true if RUBY_PLATFORM =~ /win/i
|
116
|
+
end
|
117
|
+
|
118
|
+
def value_given?(value)
|
119
|
+
value and not value.empty?
|
120
|
+
end
|
121
|
+
|
122
|
+
class << OPTIONS
|
123
|
+
include HtmlElement::CHARSET
|
124
|
+
attr_accessor :need_output_file, :default_title
|
125
|
+
attr_reader :input_file_basename
|
126
|
+
|
127
|
+
ENCODING_TO_CHARSET = {
|
128
|
+
'utf8' => UTF8,
|
129
|
+
'euc-jp' => EUC_JP,
|
130
|
+
'sjis' => SJIS,
|
131
|
+
'latin1' => LATIN1
|
132
|
+
}
|
133
|
+
|
134
|
+
HTML_TEMPLATES = Hash[*HTML_VERSIONS.zip([HtmlTemplate, XhtmlTemplate, Xhtml5Template]).flatten]
|
135
|
+
FORMATTERS = Hash[*HTML_VERSIONS.zip([HtmlFormat, XhtmlFormat, Xhtml5Format]).flatten]
|
136
|
+
|
137
|
+
def html_template
|
138
|
+
HTML_TEMPLATES[self[:html_version]]
|
139
|
+
end
|
140
|
+
|
141
|
+
def formatter
|
142
|
+
FORMATTERS[self[:html_version]]
|
143
|
+
end
|
144
|
+
|
145
|
+
def charset
|
146
|
+
ENCODING_TO_CHARSET[self[:encoding]]
|
147
|
+
end
|
148
|
+
|
149
|
+
def base
|
150
|
+
base_dir = self[:base]
|
151
|
+
if base_dir and base_dir !~ /[\/\\]\.*$/o
|
152
|
+
base_dir = File.join(base_dir,".")
|
153
|
+
base_dir = "file:///"+base_dir if base_dir !~ /^\./o and win32?
|
154
|
+
end
|
155
|
+
base_dir
|
156
|
+
end
|
157
|
+
|
158
|
+
def title
|
159
|
+
OPTIONS[:title]||@default_title||"-"
|
160
|
+
end
|
161
|
+
|
162
|
+
def read_template_file
|
163
|
+
File.read(File.expand_path(self[:template]))
|
164
|
+
end
|
165
|
+
|
166
|
+
def set_html_version(version)
|
167
|
+
if HTML_VERSIONS.include? version
|
168
|
+
self[:html_version] = version
|
169
|
+
else
|
170
|
+
case version
|
171
|
+
when /^x/io
|
172
|
+
self[:html_version] = HTML_VERSIONS[1] #xhtml1
|
173
|
+
when /^h5/io
|
174
|
+
self[:html_version] = HTML_VERSIONS[2] #html5
|
175
|
+
end
|
176
|
+
STDERR.puts "\"#{version}\" is an invalid option for --html_version. \"#{self[:html_version]}\" is chosen instead."
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def set_encoding(given_opt)
|
181
|
+
if ENCODING_REGEXP.values.include? given_opt
|
182
|
+
self[:encoding] = given_opt
|
183
|
+
else
|
184
|
+
ENCODING_REGEXP.each do |pat, encoding|
|
185
|
+
self[:encoding] = encoding if pat =~ given_opt
|
186
|
+
end
|
187
|
+
STDERR.puts "\"#{self[:encoding]}\" is chosen as an encoding system, instead of \"#{given_opt}\"."
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def set_options_from_input_file(input_lines)
|
192
|
+
input_lines.each do |line|
|
193
|
+
break if FILE_HEADER_PAT !~ line
|
194
|
+
line = line.chomp
|
195
|
+
self.keys.each do |opt|
|
196
|
+
if WRITTEN_OPTION_PAT[opt] =~ line and not self[:force]
|
197
|
+
self[opt] = $2
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def create_html_with_current_options
|
204
|
+
html = self.html_template.new
|
205
|
+
html.charset = self.charset
|
206
|
+
html.language = self[:lang]
|
207
|
+
html.default_css = self[:css] if self[:css]
|
208
|
+
html.base = self.base if self[:base]
|
209
|
+
html.title = self.title
|
210
|
+
html
|
211
|
+
end
|
212
|
+
|
213
|
+
def read_input_filename(filename)
|
214
|
+
@input_file_dir, @input_file_name = File.split(File.expand_path(filename))
|
215
|
+
@input_file_basename = File.basename(@input_file_name,".*")
|
216
|
+
end
|
217
|
+
|
218
|
+
def output_file_name
|
219
|
+
return nil unless self.need_output_file
|
220
|
+
if self[:output]
|
221
|
+
File.expand_path(self[:output])
|
222
|
+
else
|
223
|
+
File.join(@input_file_dir, @input_file_basename+".html")
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
OptionParser.new("** Convert texts written in a Hiki-like notation into HTML **
|
229
|
+
USAGE: #{File.basename(__FILE__)} [options]") do |opt|
|
230
|
+
opt.on("-h [html_version]", "--html_version [=html_version]",
|
231
|
+
"HTML version to be used. Choose html4 or xhtml1 (default: #{OPTIONS[:html_version]})") do |version|
|
232
|
+
OPTIONS.set_html_version(version)
|
233
|
+
end
|
234
|
+
|
235
|
+
opt.on("-l [lang]", "--lang [=lang]",
|
236
|
+
"Set the value of charset attributes (default: #{OPTIONS[:lang]})") do |lang|
|
237
|
+
OPTIONS[:lang] = lang if value_given?(lang)
|
238
|
+
end
|
239
|
+
|
240
|
+
opt.on("-e [encoding]", "--encoding [=encoding]",
|
241
|
+
"Available options: utf8, euc-jp, sjis, latin1 (default: #{OPTIONS[:encoding]})") do |given_opt|
|
242
|
+
OPTIONS.set_encoding(given_opt)
|
243
|
+
end
|
244
|
+
|
245
|
+
#use '-w' to avoid the conflict with the short option for '[-t]emplate'
|
246
|
+
opt.on("-w [(window) title]", "--title [=title]",
|
247
|
+
"Set the value of the <title> element (default: the basename of the input file)") do |title|
|
248
|
+
OPTIONS[:title] = title if value_given?(title)
|
249
|
+
end
|
250
|
+
|
251
|
+
opt.on("-c [css]", "--css [=css]",
|
252
|
+
"Set the path to a css file to be used (default: #{OPTIONS[:css]})") do |css|
|
253
|
+
OPTIONS[:css] = css
|
254
|
+
end
|
255
|
+
|
256
|
+
opt.on("-C [path_to_css_file]", "--embed-css [=path_to_css_file]",
|
257
|
+
"Set the path to a css file to be used (default: not to embed)") do |path_to_css_file|
|
258
|
+
OPTIONS[:embed_css] = path_to_css_file
|
259
|
+
end
|
260
|
+
|
261
|
+
opt.on("-b [base]", "--base [=base]",
|
262
|
+
"Specify the value of href attribute of the <base> element (default: not specified)") do |base_dir|
|
263
|
+
OPTIONS[:base] = base_dir if value_given?(base_dir)
|
264
|
+
end
|
265
|
+
|
266
|
+
opt.on("-t [template]", "--template [=template]",
|
267
|
+
"Specify a template file written in eruby format with \"<%= body %>\" inside (default: not specified)") do |template|
|
268
|
+
OPTIONS[:template] = template if value_given?(template)
|
269
|
+
end
|
270
|
+
|
271
|
+
opt.on("-o [output]", "--output [=output]",
|
272
|
+
"Output to the specified file. If no file is given, \"[input_file_basename].html\" will be used.(default: STDOUT)") do |output|
|
273
|
+
OPTIONS[:output] = File.expand_path(output) if value_given?(output)
|
274
|
+
OPTIONS.need_output_file = true
|
275
|
+
end
|
276
|
+
|
277
|
+
opt.on("-f", "--force",
|
278
|
+
"Force to apply command line options.(default: false)") do |force|
|
279
|
+
OPTIONS[:force] = force
|
280
|
+
end
|
281
|
+
|
282
|
+
opt.on("-m [contents-title]", "--table-of-contents [=contents-title]",
|
283
|
+
"Include the list of h2 and/or h3 headings with ids.(default: nil)") do |toc_title|
|
284
|
+
OPTIONS[:toc] = toc_title
|
285
|
+
end
|
286
|
+
|
287
|
+
opt.parse!
|
288
|
+
end
|
289
|
+
|
290
|
+
if $KCODE
|
291
|
+
ENCODING_REGEXP.each do |pat, encoding|
|
292
|
+
OPTIONS[:encoding] = encoding if pat =~ $KCODE and not OPTIONS[:force]
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
input_manager = InputManager.new
|
297
|
+
|
298
|
+
case ARGV.length
|
299
|
+
when 0
|
300
|
+
if OPTIONS.need_output_file and not OPTIONS[:output]
|
301
|
+
raise "You must specify a file name for output"
|
302
|
+
end
|
303
|
+
when 1
|
304
|
+
OPTIONS.read_input_filename(ARGV[0])
|
305
|
+
end
|
306
|
+
|
307
|
+
input_lines = ARGF.lines.to_a
|
308
|
+
|
309
|
+
OPTIONS.set_options_from_input_file(input_lines)
|
310
|
+
OPTIONS.default_title = OPTIONS.input_file_basename
|
311
|
+
|
312
|
+
html = input_manager.compose_html(input_lines)
|
313
|
+
output_file_name = OPTIONS.output_file_name
|
314
|
+
|
315
|
+
if output_file_name
|
316
|
+
open(output_file_name, "w") {|f| f.puts html }
|
317
|
+
else
|
318
|
+
STDOUT.puts html
|
319
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'htmlelement'
|
4
|
+
|
5
|
+
class HtmlTemplate
|
6
|
+
|
7
|
+
META_CHARSET = "text/html; charset=%s"
|
8
|
+
LANGUAGE = Hash.new("en")
|
9
|
+
LANGUAGE[HtmlElement::CHARSET::EUC_JP] = "ja"
|
10
|
+
LANGUAGE[HtmlElement::CHARSET::SJIS] = "ja"
|
11
|
+
ELEMENT = { self => HtmlElement }
|
12
|
+
|
13
|
+
def initialize(charset=ELEMENT[self.class]::CHARSET::UTF8, language="en", css_link="default.css", base_uri=nil)
|
14
|
+
@html = create_element("html", nil, "lang" => language)
|
15
|
+
@head = create_element("head")
|
16
|
+
@charset = charset
|
17
|
+
@content_language = create_meta("Content-Language", language)
|
18
|
+
@base = set_path_to_base(base_uri)
|
19
|
+
@content_type = set_charset_in_meta(charset)
|
20
|
+
@content_style_type = create_meta("Content-Style-Type","text/css")
|
21
|
+
@content_script_type = create_meta("Content-Script-Type","text/javascript")
|
22
|
+
@default_css_link = create_css_link(css_link)
|
23
|
+
@title = nil
|
24
|
+
@title_element = create_element("title")
|
25
|
+
@body = create_element("body")
|
26
|
+
@html.push @head
|
27
|
+
@html.push @body
|
28
|
+
[ @content_language,
|
29
|
+
@content_type,
|
30
|
+
@content_sytle_type,
|
31
|
+
@content_script_type,
|
32
|
+
@title_element,
|
33
|
+
@base,
|
34
|
+
@default_css_link
|
35
|
+
].each do |element|
|
36
|
+
@head.push element
|
37
|
+
end
|
38
|
+
end
|
39
|
+
attr_reader :title, :head
|
40
|
+
|
41
|
+
def create_element(*params)
|
42
|
+
ELEMENT[self.class].create(*params)
|
43
|
+
end
|
44
|
+
|
45
|
+
def charset=(charset_name)
|
46
|
+
@charset=charset_name
|
47
|
+
@content_language["content"] = LANGUAGE[@charset]
|
48
|
+
@content_type["content"] = META_CHARSET%[charset_name]
|
49
|
+
end
|
50
|
+
|
51
|
+
def language=(language)
|
52
|
+
@content_language["content"] = language
|
53
|
+
@html["lang"] = language
|
54
|
+
end
|
55
|
+
|
56
|
+
def base=(base_uri)
|
57
|
+
if @base.empty?
|
58
|
+
@base = create_element("base", nil, "href" => base_uri)
|
59
|
+
@head.push @base
|
60
|
+
else
|
61
|
+
@base["href"] = base_uri
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def add_css_file(file_path)
|
66
|
+
@head.push create_css_link(file_path)
|
67
|
+
end
|
68
|
+
|
69
|
+
def default_css=(file_path)
|
70
|
+
@default_css_link["href"] = file_path
|
71
|
+
end
|
72
|
+
|
73
|
+
def title=(title)
|
74
|
+
@title_element.pop until @title_element.empty?
|
75
|
+
@title = title
|
76
|
+
@title_element.push title
|
77
|
+
end
|
78
|
+
|
79
|
+
def push(element)
|
80
|
+
@body.push element
|
81
|
+
end
|
82
|
+
|
83
|
+
def euc_jp!
|
84
|
+
self.charset = ELEMENT[self.class]::CHARSET::EUC_JP
|
85
|
+
self.language = "ja"
|
86
|
+
end
|
87
|
+
|
88
|
+
def sjis!
|
89
|
+
self.charset = ELEMENT[self.class]::CHARSET::SJIS
|
90
|
+
self.language = "ja"
|
91
|
+
end
|
92
|
+
|
93
|
+
def utf8!
|
94
|
+
self.charset = ELEMENT[self.class]::CHARSET::UTF8
|
95
|
+
end
|
96
|
+
|
97
|
+
def latin1!
|
98
|
+
self.charset = ELEMENT[self.class]::CHARSET::LATIN1
|
99
|
+
end
|
100
|
+
|
101
|
+
def to_s
|
102
|
+
[ELEMENT[self.class].doctype(@charset),
|
103
|
+
@html].join("")
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def create_meta(type,content)
|
109
|
+
create_element("meta", nil,
|
110
|
+
"http-equiv" => type,
|
111
|
+
"content" => content)
|
112
|
+
end
|
113
|
+
|
114
|
+
def create_css_link(file_path)
|
115
|
+
create_element("link", nil,
|
116
|
+
"rel" => "stylesheet",
|
117
|
+
"type" => "text/css",
|
118
|
+
"href" => file_path)
|
119
|
+
end
|
120
|
+
|
121
|
+
def set_charset_in_meta(charset)
|
122
|
+
create_meta("Content-Type",META_CHARSET%[charset])
|
123
|
+
end
|
124
|
+
|
125
|
+
def set_path_to_base(base_uri)
|
126
|
+
return "" unless base_uri
|
127
|
+
create_element("base", nil, "href" => base_uri)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
class XhtmlTemplate < HtmlTemplate
|
132
|
+
ELEMENT[self] = XhtmlElement
|
133
|
+
|
134
|
+
def initialize(*params)
|
135
|
+
super(*params)
|
136
|
+
@html['xmlns'] = 'http://www.w3.org/1999/xhtml'
|
137
|
+
@html["xml:lang"] = @html["lang"] #language
|
138
|
+
end
|
139
|
+
|
140
|
+
def language=(language)
|
141
|
+
super(language)
|
142
|
+
@html["xml:lang"] = language
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class Xhtml5Template < XhtmlTemplate
|
147
|
+
ELEMENT[self] = Xhtml5Element
|
148
|
+
|
149
|
+
def initialize(*params)
|
150
|
+
super(*params)
|
151
|
+
|
152
|
+
def @content_language.to_str
|
153
|
+
""
|
154
|
+
end
|
155
|
+
|
156
|
+
def @content_script_type.to_str
|
157
|
+
""
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def set_charset_in_meta(charset)
|
162
|
+
create_element("meta", nil, "charset" => charset)
|
163
|
+
end
|
164
|
+
|
165
|
+
def charset=(charset_name)
|
166
|
+
@charset=charset_name
|
167
|
+
@content_type["charset"] = @charset
|
168
|
+
end
|
169
|
+
end
|
data/lib/htmlelement.rb
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'kconv'
|
4
|
+
|
5
|
+
class HtmlElement
|
6
|
+
class Children < Array
|
7
|
+
def to_s
|
8
|
+
self.join
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module CHARSET
|
13
|
+
EUC_JP = "EUC-JP"
|
14
|
+
SJIS = "Shift_JIS"
|
15
|
+
UTF8 = "UTF-8"
|
16
|
+
LATIN1 = "ISO-8859-1"
|
17
|
+
end
|
18
|
+
|
19
|
+
DOCTYPE = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
20
|
+
"http://www.w3.org/TR/html4/loose.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
|
21
|
+
|
22
|
+
ESC = {
|
23
|
+
'&' => '&',
|
24
|
+
'"' => '"',
|
25
|
+
'<' => '<',
|
26
|
+
'>' => '>'
|
27
|
+
}
|
28
|
+
|
29
|
+
DECODE = ESC.invert
|
30
|
+
CharEntityPat = /#{DECODE.keys.join("|")}/
|
31
|
+
|
32
|
+
Html5Tags = %w(article section hgroup aside nav menu header footer figure details legend)
|
33
|
+
|
34
|
+
ELEMENT_TYPES = {
|
35
|
+
:BLOCK => %w(html body div table colgroup thead tbody ul ol dl head p pre blockquote style),
|
36
|
+
:HEADING_TYPE_BLOCK => %w(dt dd tr title h1 h2 h3 h4 h5 h6),
|
37
|
+
:LIST_ITEM_TYPE_BLOCK => %w(li col),
|
38
|
+
:EMPTY_BLOCK => %w(img meta link base input hr)
|
39
|
+
}
|
40
|
+
|
41
|
+
ELEMENTS_FORMAT = {
|
42
|
+
:INLINE => "<%s%s>%s</%s>",
|
43
|
+
:BLOCK => "<%s%s>#{$/}%s</%s>#{$/}",
|
44
|
+
:HEADING_TYPE_BLOCK => "<%s%s>%s</%s>#{$/}",
|
45
|
+
:LIST_ITEM_TYPE_BLOCK => "<%s%s>%s#{$/}",
|
46
|
+
:EMPTY_BLOCK => "<%s%s>#{$/}"
|
47
|
+
}
|
48
|
+
|
49
|
+
attr_reader :tagname
|
50
|
+
attr_accessor :parent, :children
|
51
|
+
|
52
|
+
def self.doctype(encoding="UTF-8")
|
53
|
+
self::DOCTYPE%[encoding]
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.create(tagname, content=nil, attributes={})
|
57
|
+
if self::Html5Tags.include? tagname
|
58
|
+
attributes["class"] = tagname
|
59
|
+
tagname = "div"
|
60
|
+
end
|
61
|
+
self.new(tagname, content, attributes)
|
62
|
+
end
|
63
|
+
|
64
|
+
def HtmlElement.comment(content)
|
65
|
+
"<!-- #{content} -->#{$/}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def HtmlElement.urlencode(str)
|
69
|
+
str.toutf8.gsub(/[^\w\.\-]/n) {|ch| format('%%%02X', ch[0]) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def HtmlElement.urldecode(str)
|
73
|
+
utf = str.gsub(/%\w\w/) {|ch| [ch[-2,2]].pack('H*') }
|
74
|
+
return utf.tosjis if $KCODE =~ /^s/io
|
75
|
+
return utf.toeuc if $KCODE =~ /^e/io
|
76
|
+
utf
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.assign_tagformats
|
80
|
+
tagformats = Hash.new(ELEMENTS_FORMAT[:INLINE])
|
81
|
+
self::ELEMENT_TYPES.each do |type, names|
|
82
|
+
names.each {|name| tagformats[name] = self::ELEMENTS_FORMAT[type] }
|
83
|
+
end
|
84
|
+
tagformats[""] = "%s%s%s"
|
85
|
+
tagformats
|
86
|
+
end
|
87
|
+
|
88
|
+
def HtmlElement.escape(str)
|
89
|
+
str.gsub(/[&"<>]/on) {|pat| ESC[pat] }
|
90
|
+
end
|
91
|
+
|
92
|
+
def HtmlElement.decode(str)
|
93
|
+
str.gsub(CharEntityPat) {|ent| DECODE[ent]}
|
94
|
+
end
|
95
|
+
|
96
|
+
TagFormats = self.assign_tagformats
|
97
|
+
|
98
|
+
def initialize(tagname, content=nil, attributes={})
|
99
|
+
@parent = nil
|
100
|
+
@tagname = tagname
|
101
|
+
@children = Children.new
|
102
|
+
@children.push content if content
|
103
|
+
@attributes = attributes
|
104
|
+
@end_comment_not_added = true
|
105
|
+
end
|
106
|
+
|
107
|
+
def empty?
|
108
|
+
@children.empty?
|
109
|
+
end
|
110
|
+
|
111
|
+
def push(child)
|
112
|
+
@children.push child
|
113
|
+
child.parent = self if child.kind_of? HtmlElement
|
114
|
+
self
|
115
|
+
end
|
116
|
+
|
117
|
+
def pop
|
118
|
+
@children.pop
|
119
|
+
end
|
120
|
+
|
121
|
+
def []=(attribute, value)
|
122
|
+
@attributes[attribute] = value
|
123
|
+
end
|
124
|
+
|
125
|
+
def [](attribute)
|
126
|
+
@attributes[attribute]
|
127
|
+
end
|
128
|
+
|
129
|
+
def format_attributes
|
130
|
+
@attributes.collect do |attr,value|
|
131
|
+
' %s="%s"'%[attr,HtmlElement.escape(value.to_s)]
|
132
|
+
end.sort.join
|
133
|
+
end
|
134
|
+
private :format_attributes
|
135
|
+
|
136
|
+
def add_end_comment_for_div_or_section
|
137
|
+
if @tagname == "div" or @tagname == "section" and @end_comment_not_added
|
138
|
+
id_or_class = self["id"]||self["class"]
|
139
|
+
self.push HtmlElement.comment("end of #{id_or_class}") if id_or_class
|
140
|
+
@end_comment_not_added = false
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def to_s
|
145
|
+
add_end_comment_for_div_or_section
|
146
|
+
self.class::TagFormats[@tagname]%[@tagname, format_attributes, @children, @tagname]
|
147
|
+
end
|
148
|
+
alias to_str to_s
|
149
|
+
end
|
150
|
+
|
151
|
+
class XhtmlElement < HtmlElement
|
152
|
+
DOCTYPE = '<?xml version="1.0" encoding="%s"?>
|
153
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
154
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'.split(/\r?\n/o).join($/)+"#{$/}"
|
155
|
+
|
156
|
+
ELEMENTS_FORMAT = self.superclass::ELEMENTS_FORMAT.dup
|
157
|
+
ELEMENTS_FORMAT[:LIST_ITEM_TYPE_BLOCK] = "<%s%s>%s</%s>#{$/}"
|
158
|
+
ELEMENTS_FORMAT[:EMPTY_BLOCK] = "<%s%s />#{$/}"
|
159
|
+
|
160
|
+
TagFormats = self.assign_tagformats
|
161
|
+
end
|
162
|
+
|
163
|
+
class Xhtml5Element < XhtmlElement
|
164
|
+
DOCTYPE = '<?xml version="1.0" encoding="%s"?>
|
165
|
+
<!DOCTYPE html>'.split(/\r?\n/o).join($/)+"#{$/}"
|
166
|
+
|
167
|
+
ELEMENT_TYPES = self.superclass::ELEMENT_TYPES.dup
|
168
|
+
ELEMENT_TYPES[:BLOCK] = self.superclass::ELEMENT_TYPES[:BLOCK] + self.superclass::Html5Tags
|
169
|
+
Html5Tags = %w(main)
|
170
|
+
|
171
|
+
TagFormats = self.assign_tagformats
|
172
|
+
end
|