aozora2html 0.7.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/bin/aozora2html +3 -50
  3. data/lib/aozora2html.rb +0 -4
  4. data/lib/aozora2html/accent_parser.rb +91 -0
  5. data/lib/aozora2html/error.rb +16 -0
  6. data/lib/aozora2html/header.rb +139 -0
  7. data/lib/aozora2html/i18n.rb +17 -0
  8. data/lib/aozora2html/ruby_buffer.rb +81 -0
  9. data/lib/aozora2html/style_stack.rb +27 -0
  10. data/lib/aozora2html/tag.rb +55 -0
  11. data/lib/aozora2html/tag/accent.rb +39 -0
  12. data/lib/aozora2html/tag/block.rb +19 -0
  13. data/lib/aozora2html/tag/chitsuki.rb +15 -0
  14. data/lib/aozora2html/tag/dakuten_katakana.rb +23 -0
  15. data/lib/aozora2html/tag/decorate.rb +18 -0
  16. data/lib/aozora2html/tag/dir.rb +15 -0
  17. data/lib/aozora2html/tag/editor_note.rb +16 -0
  18. data/lib/aozora2html/tag/embed_gaiji.rb +48 -0
  19. data/lib/aozora2html/tag/font_size.rb +17 -0
  20. data/lib/aozora2html/tag/gaiji.rb +11 -0
  21. data/lib/aozora2html/tag/img.rb +21 -0
  22. data/lib/aozora2html/tag/indent.rb +8 -0
  23. data/lib/aozora2html/tag/inline.rb +13 -0
  24. data/lib/aozora2html/tag/inline_caption.rb +15 -0
  25. data/lib/aozora2html/tag/inline_font_size.rb +18 -0
  26. data/lib/aozora2html/tag/inline_keigakomi.rb +15 -0
  27. data/lib/aozora2html/tag/inline_yokogumi.rb +16 -0
  28. data/lib/aozora2html/tag/jisage.rb +15 -0
  29. data/lib/aozora2html/tag/jizume.rb +16 -0
  30. data/lib/aozora2html/tag/kaeriten.rb +15 -0
  31. data/lib/aozora2html/tag/keigakomi.rb +13 -0
  32. data/lib/aozora2html/tag/kunten.rb +12 -0
  33. data/lib/aozora2html/tag/midashi.rb +18 -0
  34. data/lib/aozora2html/tag/multiline.rb +6 -0
  35. data/lib/aozora2html/tag/multiline_caption.rb +15 -0
  36. data/lib/aozora2html/tag/multiline_chitsuki.rb +8 -0
  37. data/lib/aozora2html/tag/multiline_jisage.rb +8 -0
  38. data/lib/aozora2html/tag/multiline_midashi.rb +22 -0
  39. data/lib/aozora2html/tag/multiline_style.rb +17 -0
  40. data/lib/aozora2html/tag/multiline_yokogumi.rb +17 -0
  41. data/lib/aozora2html/tag/okurigana.rb +15 -0
  42. data/lib/aozora2html/tag/oneline_chitsuki.rb +9 -0
  43. data/lib/aozora2html/tag/oneline_indent.rb +2 -0
  44. data/lib/aozora2html/tag/oneline_jisage.rb +7 -0
  45. data/lib/aozora2html/tag/reference_mentioned.rb +46 -0
  46. data/lib/aozora2html/tag/ruby.rb +98 -0
  47. data/lib/aozora2html/tag/un_embed_gaiji.rb +24 -0
  48. data/lib/aozora2html/tag_parser.rb +53 -0
  49. data/lib/aozora2html/utils.rb +82 -0
  50. data/lib/aozora2html/version.rb +1 -1
  51. data/lib/aozora2html/yaml_loader.rb +31 -0
  52. data/lib/extensions.rb +31 -0
  53. data/lib/jstream.rb +68 -0
  54. data/lib/t2hs.rb +485 -1490
  55. data/test/test_aozora2html.rb +161 -57
  56. data/test/test_aozora_accent_parser.rb +13 -2
  57. data/test/test_command_parse.rb +213 -0
  58. data/test/test_dakuten_katakana_tag.rb +5 -4
  59. data/test/test_decorate_tag.rb +5 -4
  60. data/test/test_dir_tag.rb +5 -4
  61. data/test/test_editor_note_tag.rb +4 -4
  62. data/test/test_exception.rb +4 -4
  63. data/test/test_font_size_tag.rb +8 -7
  64. data/test/test_gaiji_tag.rb +14 -7
  65. data/test/test_header.rb +60 -0
  66. data/test/test_img_tag.rb +5 -4
  67. data/test/test_inline_caption_tag.rb +5 -4
  68. data/test/test_inline_font_size_tag.rb +7 -6
  69. data/test/test_inline_keigakomi_tag.rb +5 -4
  70. data/test/test_inline_yokogumi_tag.rb +5 -4
  71. data/test/test_jizume_tag.rb +7 -6
  72. data/test/test_jstream.rb +5 -5
  73. data/test/test_kaeriten_tag.rb +5 -4
  74. data/test/test_keigakomi_tag.rb +8 -7
  75. data/test/test_midashi_tag.rb +38 -0
  76. data/test/test_multiline_caption_tag.rb +7 -6
  77. data/test/test_multiline_midashi_tag.rb +12 -11
  78. data/test/test_multiline_style_tag.rb +7 -6
  79. data/test/test_multiline_yokogumi_tag.rb +7 -6
  80. data/test/test_okurigana_tag.rb +5 -4
  81. data/test/test_ruby_parse.rb +116 -0
  82. data/test/test_ruby_tag.rb +5 -4
  83. data/test/test_tag_parser.rb +10 -10
  84. data/yml/accent_table.yml +240 -0
  85. data/yml/command_table.yml +61 -0
  86. data/yml/jis2ucs.yml +11234 -0
  87. metadata +63 -6
  88. data/lib/accent_tag.rb +0 -23
  89. data/lib/aozora2html/jis2ucs.rb +0 -11237
  90. data/lib/embed_gaiji_tag.rb +0 -34
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 69f585eb774610409f4a56b1645bc2ebdf71ce35
4
- data.tar.gz: c2c28ed11b58ca3d6df952f3b5709d14184e48f4
3
+ metadata.gz: ecb4e24d58fac283904c18a996d7f19d308f4d69
4
+ data.tar.gz: 0e3738df57189ff06736a508a6aae1c79a34667e
5
5
  SHA512:
6
- metadata.gz: 9291546c4ff8e76d64e0e1eaa32474efcea5c6d952e8215841d2a0d1da0beccae37d01a4dd8d1689ebd9ff962b6511d1811e6e3b4d3b381be8d102ccd63356b4
7
- data.tar.gz: 23235e3da67c52b05cfb648275888ee82239f94e1f47d3a78261188ae4eaa110e1ca18dc5d9b8af1113e054e00f56586976fb5fb7fb99628175c2f0c2625d4ce
6
+ metadata.gz: 84e3a6e4e4dc1d274238a122001995226e0712657a6111080d410dbc1ba1d33e80f1df646b4936bf9553e701e85e0fa9d50ab37ec0afab51dda1fbcdaf04af65
7
+ data.tar.gz: fb2f60e2bd5a4f6ee9d0e9c4597915431b4d69c58420a0bd894efee97f49061f317e240a8dd151c1e90591fd82de55b14cc5e4bb37221c2c7fed57f82d85ca6a
@@ -4,53 +4,6 @@ require 'aozora2html'
4
4
  require 'optparse'
5
5
  require "tempfile"
6
6
 
7
- # override Aozora2Html#push_chars
8
- #
9
- # Original Aozora2Html#push_chars does not convert "'" into '''; it's old behaivor
10
- # of CGI.escapeHTML().
11
- #
12
- class Aozora2Html
13
- def push_chars(obj)
14
- if obj.is_a?(Array)
15
- obj.each{|x|
16
- push_chars(x)
17
- }
18
- elsif obj.is_a?(String)
19
- if obj.length == 1
20
- obj = obj.gsub(/[&\"<>]/, {'&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;'})
21
- end
22
- obj.each_char{|x|
23
- push_char(x)
24
- }
25
- else
26
- push_char(obj)
27
- end
28
- end
29
-
30
- def dispatch_gaiji
31
- hook = @stream.peek_char(0)
32
- if hook == "[".encode("shift_jis")
33
- read_char
34
- # embed?
35
- command,raw = read_to_nest("]".encode("shift_jis"))
36
- try_emb = kuten2png(command)
37
- if try_emb != command
38
- try_emb
39
- elsif command.match(/U\+([0-9A-F]{4,5})/) && Embed_Gaiji_tag.use_unicode
40
- unicode_num = $1
41
- ch = Embed_Gaiji_tag.new(self, nil, nil, command)
42
- ch.unicode = unicode_num
43
- ch
44
- else
45
- # Unemb
46
- escape_gaiji(command)
47
- end
48
- else
49
- "※".encode("shift_jis")
50
- end
51
- end
52
- end
53
-
54
7
  opt = OptionParser.new("Usage: aozora2html [options] <text file> [<html file>]\n")
55
8
  opt.on('--gaiji-dir DIR', 'setting gaiji directory')
56
9
  opt.on('--css-files FILES', 'setting css directory')
@@ -68,12 +21,12 @@ if options["css-files"]
68
21
  end
69
22
 
70
23
  if options["use-jisx0213"]
71
- Embed_Gaiji_tag.use_jisx0213 = true
72
- Accent_tag.use_jisx0213 = true
24
+ Aozora2Html::Tag::EmbedGaiji.use_jisx0213 = true
25
+ Aozora2Html::Tag::Accent.use_jisx0213 = true
73
26
  end
74
27
 
75
28
  if options["use-unicode"]
76
- Embed_Gaiji_tag.use_unicode = true
29
+ Aozora2Html::Tag::EmbedGaiji.use_unicode = true
77
30
  end
78
31
 
79
32
  if ARGV.size < 1 || ARGV.size > 2
@@ -1,9 +1,5 @@
1
1
  require "aozora2html/version"
2
- require "aozora2html/zip"
3
- require "aozora2html/jis2ucs"
4
2
  require 't2hs'
5
- require 'embed_gaiji_tag'
6
- require 'accent_tag'
7
3
 
8
4
  ## already defined in t2hs.rb
9
5
  class Aozora2Html
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ require 'aozora2html/ruby_buffer'
3
+ class Aozora2Html
4
+
5
+ # accent特殊文字を生かすための再帰呼び出し
6
+ class AccentParser < Aozora2Html
7
+
8
+ def initialize(input, endchar, chuuki, image)
9
+ if not(input.is_a?(Jstream))
10
+ raise ArgumentError, "tag_parser must supply Jstream as input"
11
+ end
12
+ @stream = input
13
+ @buffer = []
14
+ @ruby_buf = Aozora2Html::RubyBuffer.new
15
+ @chuuki_table = chuuki
16
+ @images = image # globalな環境を記録するアイテムは共有する必要あり
17
+ @endchar = endchar # 改行は越えられない <br />を出力していられない
18
+ @closed = nil # 改行での強制撤退チェックフラグ
19
+ @encount_accent = nil
20
+ end
21
+
22
+ def general_output # 出力は配列で返す
23
+ @ruby_buf.dump(@buffer)
24
+ if !@encount_accent
25
+ @buffer.unshift("〔".encode("shift_jis"))
26
+ end
27
+ if @closed and !@encount_accent
28
+ @buffer.push("〕".encode("shift_jis"))
29
+ elsif not(@closed)
30
+ @buffer.push("<br />\r\n")
31
+ end
32
+ @buffer
33
+ end
34
+
35
+ def parse
36
+ first = read_char
37
+ if found = Aozora2Html::ACCENT_TABLE[first]
38
+ if found2 = found[@stream.peek_char(0)]
39
+ if found2.is_a?(Hash)
40
+ if found3 = found2[@stream.peek_char(1)]
41
+ first = Aozora2Html::Tag::Accent.new(self, *found3)
42
+ @encount_accent = true
43
+ @chuuki_table[:accent] = true
44
+ read_char
45
+ read_char
46
+ end
47
+ elsif found2
48
+ first = Aozora2Html::Tag::Accent.new(self, *found2)
49
+ @encount_accent = true
50
+ read_char
51
+ @chuuki_table[:accent] = true
52
+ end
53
+ end
54
+ end
55
+ case first
56
+ when Aozora2Html::GAIJI_MARK
57
+ first = dispatch_gaiji
58
+ when "[".encode("shift_jis")
59
+ first = dispatch_aozora_command
60
+ when Aozora2Html::KU
61
+ assign_kunoji
62
+ when "《".encode("shift_jis")
63
+ first = apply_ruby
64
+ end
65
+ if first == "\r\n"
66
+ if @encount_accent
67
+ puts "警告(#{line_number}行目):アクセント分解の亀甲括弧の始めと終わりが、行中で揃っていません".encode("shift_jis")
68
+ end
69
+ throw :terminate
70
+ elsif first == "〕".encode("shift_jis")
71
+ @closed = true
72
+ throw :terminate
73
+ elsif first == RUBY_PREFIX
74
+ @ruby_buf.dump(@buffer)
75
+ @ruby_buf.protected = true
76
+ elsif first != "" and first != nil
77
+ illegal_char_check(first, line_number)
78
+ push_chars(first)
79
+ end
80
+ end
81
+
82
+ def process
83
+ catch(:terminate) do
84
+ loop do
85
+ parse
86
+ end
87
+ end
88
+ general_output
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,16 @@
1
+ require "aozora2html/i18n"
2
+
3
+ # 例外class
4
+ class Aozora2Html
5
+ class Error < StandardError
6
+
7
+ def initialize(message)
8
+ @message = message
9
+ end
10
+
11
+ def message(line)
12
+ I18n.t(:error_stop, line, @message)
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+ class Aozora2Html
3
+ class Header
4
+ def initialize()
5
+ @header = []
6
+ end
7
+
8
+ def push(line)
9
+ @header.push(line)
10
+ end
11
+
12
+ def out_header_info(hash, attr, true_name = nil)
13
+ found = hash[attr]
14
+ if found
15
+ "<h2 class=\"#{true_name or attr}\">#{found}</h2>\r\n"
16
+ else
17
+ ""
18
+ end
19
+ end
20
+
21
+ def header_element_type(string)
22
+ original = true
23
+ string.each_char do |x|
24
+ code = x.unpack("H*")[0]
25
+ if ("00" <= code and code <= "7f") or # 1byte
26
+ ("8140" <= code and code <= "8258") or # 1-1, 3-25
27
+ ("839f" <= code and code <= "8491") # 6-1, 7-81
28
+ # continue
29
+ else
30
+ original = false
31
+ break
32
+ end
33
+ end
34
+ if original
35
+ :original
36
+ elsif string.match(PAT_EDITOR)
37
+ :editor
38
+ elsif string.match(PAT_HENYAKU)
39
+ :henyaku
40
+ elsif string.match(PAT_TRANSLATOR)
41
+ :translator
42
+ end
43
+ end
44
+
45
+ def process_person(string, header_info)
46
+ type = header_element_type(string)
47
+ case type
48
+ when :editor
49
+ header_info[:editor] = string
50
+ when :translator
51
+ header_info[:translator] = string
52
+ when :henyaku
53
+ header_info[:henyaku] = string
54
+ else
55
+ type = :author
56
+ header_info[:author] = string
57
+ end
58
+ type
59
+ end
60
+
61
+ def build_title(header_info)
62
+ buf = [:author, :translator, :editor, :henyaku,
63
+ :title, :original_title,
64
+ :subtitle, :original_subtitle].map{|item| header_info[item]}.compact
65
+ buf_str = buf.join(" ")
66
+ "<title>#{buf_str}</title>"
67
+ end
68
+
69
+ def build_header_info
70
+ header_info = {:title => @header[0]}
71
+ case @header.length
72
+ when 2
73
+ process_person(@header[1], header_info)
74
+ when 3
75
+ if header_element_type(@header[1]) == :original
76
+ header_info[:original_title] = @header[1]
77
+ process_person(@header[2], header_info)
78
+ elsif process_person(@header[2], header_info) == :author
79
+ header_info[:subtitle] = @header[1]
80
+ else
81
+ header_info[:author] = @header[1]
82
+ end
83
+ when 4
84
+ if header_element_type(@header[1]) == :original
85
+ header_info[:original_title] = @header[1]
86
+ else
87
+ header_info[:subtitle] = @header[1]
88
+ end
89
+ if process_person(@header[3], header_info) == :author
90
+ header_info[:subtitle] = @header[2]
91
+ else
92
+ header_info[:author] = @header[2]
93
+ end
94
+ when 5
95
+ header_info[:original_title] = @header[1]
96
+ header_info[:subtitle] = @header[2]
97
+ header_info[:author] = @header[3]
98
+ if process_person(@header[4], header_info) == :author
99
+ raise Aozora2Html::Error, "parser encounted author twice"
100
+ end
101
+ when 6
102
+ header_info[:original_title] = @header[1]
103
+ header_info[:subtitle] = @header[2]
104
+ header_info[:original_subtitle] = @header[3]
105
+ header_info[:author] = @header[4]
106
+ if process_person(@header[5], header_info) == :author
107
+ raise Aozora2Html::Error, "parser encounted author twice"
108
+ end
109
+ end
110
+ header_info
111
+ end
112
+
113
+ def to_html
114
+ header_info = build_header_info()
115
+
116
+ # <title> 行を構築
117
+ html_title = build_title(header_info)
118
+
119
+ # 出力
120
+ out_buf = []
121
+ out_buf.push("<?xml version=\"1.0\" encoding=\"Shift_JIS\"?>\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\r\n \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"ja\" >\r\n<head>\r\n <meta http-equiv=\"Content-Type\" content=\"text/html;charset=Shift_JIS\" />\r\n <meta http-equiv=\"content-style-type\" content=\"text/css\" />\r\n")
122
+ $css_files.each do |css|
123
+ out_buf.push("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"" + css + "\" />\r\n")
124
+ end
125
+ out_buf.push("\t#{html_title}\r\n <script type=\"text/javascript\" src=\"../../jquery-1.4.2.min.js\"></script>\r\n <link rel=\"Schema.DC\" href=\"http://purl.org/dc/elements/1.1/\" />\r\n <meta name=\"DC.Title\" content=\"#{header_info[:title]}\" />\r\n <meta name=\"DC.Creator\" content=\"#{header_info[:author]}\" />\r\n <meta name=\"DC.Publisher\" content=\"#{AOZORABUNKO}\" />\r\n</head>\r\n<body>\r\n<div class=\"metadata\">\r\n")
126
+ out_buf.push("<h1 class=\"title\">#{header_info[:title]}</h1>\r\n" +
127
+ out_header_info(header_info, :original_title) +
128
+ out_header_info(header_info, :subtitle) +
129
+ out_header_info(header_info, :original_subtitle) +
130
+ out_header_info(header_info, :author) +
131
+ out_header_info(header_info, :editor) +
132
+ out_header_info(header_info, :translator) +
133
+ out_header_info(header_info, :henyaku, "editor-translator"))
134
+ out_buf.push("<br />\r\n<br />\r\n</div>\r\n<div id=\"contents\" style=\"display:none\"></div><div class=\"main_text\">")
135
+ out_buf.join("")
136
+ end
137
+
138
+ end
139
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ class Aozora2Html
3
+ class I18n
4
+ MSG = {
5
+ :tag_syntax_error => "注記を重ねる際の原則、「狭い範囲を先に、広い範囲を後に」が守られていません。リンク先の指針を参考に、書き方をあらためてください",
6
+ :undefined_header => "未定義な見出しです",
7
+ :use_crlf => "改行コードを、「CR+LF」にあらためてください",
8
+ :error_stop => "エラー(%d行目):%s. \r\n処理を停止します",
9
+ :invalid_font_size => "文字サイズの指定が不正です",
10
+ :unsupported_ruby => "サポートされていない複雑なルビ付けです"
11
+ }
12
+
13
+ def self.t(msg, *args)
14
+ (MSG[msg].encode("shift_jis") % args)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,81 @@
1
+ class Aozora2Html
2
+ class RubyBuffer
3
+
4
+ # `|`が来た時に真にする。ルビの親文字のガード用。
5
+ attr_accessor :protected
6
+
7
+ # @ruby_buf内の文字のchar_type
8
+ attr_accessor :char_type
9
+
10
+ def initialize(item=nil)
11
+ clear(item)
12
+ end
13
+
14
+ # バッファの初期化。引数itemがあるときはその1要素のバッファに、
15
+ # 引数がなければ`""`の1要素のバッファにする。
16
+ def clear(item=nil)
17
+ if item
18
+ @ruby_buf = [item]
19
+ else
20
+ @ruby_buf = [""]
21
+ end
22
+ @protected = nil
23
+ @char_type = nil
24
+ end
25
+
26
+ def empty?
27
+ @ruby_buf.empty?
28
+ end
29
+
30
+ def present?
31
+ !empty?
32
+ end
33
+
34
+ def to_a
35
+ @ruby_buf
36
+ end
37
+
38
+ def each(&block)
39
+ @ruby_buf.each(&block)
40
+ end
41
+
42
+ def last
43
+ @ruby_buf.last
44
+ end
45
+
46
+ def push(item)
47
+ @ruby_buf.push(item)
48
+ end
49
+
50
+ def length
51
+ @ruby_buf.length
52
+ end
53
+
54
+ def last_concat(item)
55
+ @ruby_buf.last.concat(item)
56
+ end
57
+
58
+ def last_is_string?
59
+ @ruby_buf.last.is_a?(String)
60
+ end
61
+
62
+ # buffer management
63
+ def dump(buffer)
64
+ if @protected
65
+ @ruby_buf.unshift(RUBY_PREFIX)
66
+ @protected = nil
67
+ end
68
+ top = @ruby_buf[0]
69
+ if top.is_a?(String) and buffer.last.is_a?(String)
70
+ buffer.last.concat(top)
71
+ buffer.concat(@ruby_buf[1,@ruby_buf.length])
72
+ else
73
+ buffer.concat(@ruby_buf)
74
+ end
75
+ clear
76
+ buffer
77
+ end
78
+
79
+ end
80
+ end
81
+
@@ -0,0 +1,27 @@
1
+ class Aozora2Html
2
+ class StyleStack
3
+ def initialize
4
+ @stack = []
5
+ end
6
+
7
+ def push(elem)
8
+ @stack.push(elem)
9
+ end
10
+
11
+ def empty?
12
+ @stack.empty?
13
+ end
14
+
15
+ def pop
16
+ @stack.pop
17
+ end
18
+
19
+ def last
20
+ @stack.last
21
+ end
22
+
23
+ def last_command
24
+ @stack.last[0]
25
+ end
26
+ end
27
+ end