aozora2html 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/bin/aozora2html +3 -50
  3. data/lib/aozora2html.rb +0 -4
  4. data/lib/aozora2html/accent_parser.rb +91 -0
  5. data/lib/aozora2html/error.rb +16 -0
  6. data/lib/aozora2html/header.rb +139 -0
  7. data/lib/aozora2html/i18n.rb +17 -0
  8. data/lib/aozora2html/ruby_buffer.rb +81 -0
  9. data/lib/aozora2html/style_stack.rb +27 -0
  10. data/lib/aozora2html/tag.rb +55 -0
  11. data/lib/aozora2html/tag/accent.rb +39 -0
  12. data/lib/aozora2html/tag/block.rb +19 -0
  13. data/lib/aozora2html/tag/chitsuki.rb +15 -0
  14. data/lib/aozora2html/tag/dakuten_katakana.rb +23 -0
  15. data/lib/aozora2html/tag/decorate.rb +18 -0
  16. data/lib/aozora2html/tag/dir.rb +15 -0
  17. data/lib/aozora2html/tag/editor_note.rb +16 -0
  18. data/lib/aozora2html/tag/embed_gaiji.rb +48 -0
  19. data/lib/aozora2html/tag/font_size.rb +17 -0
  20. data/lib/aozora2html/tag/gaiji.rb +11 -0
  21. data/lib/aozora2html/tag/img.rb +21 -0
  22. data/lib/aozora2html/tag/indent.rb +8 -0
  23. data/lib/aozora2html/tag/inline.rb +13 -0
  24. data/lib/aozora2html/tag/inline_caption.rb +15 -0
  25. data/lib/aozora2html/tag/inline_font_size.rb +18 -0
  26. data/lib/aozora2html/tag/inline_keigakomi.rb +15 -0
  27. data/lib/aozora2html/tag/inline_yokogumi.rb +16 -0
  28. data/lib/aozora2html/tag/jisage.rb +15 -0
  29. data/lib/aozora2html/tag/jizume.rb +16 -0
  30. data/lib/aozora2html/tag/kaeriten.rb +15 -0
  31. data/lib/aozora2html/tag/keigakomi.rb +13 -0
  32. data/lib/aozora2html/tag/kunten.rb +12 -0
  33. data/lib/aozora2html/tag/midashi.rb +18 -0
  34. data/lib/aozora2html/tag/multiline.rb +6 -0
  35. data/lib/aozora2html/tag/multiline_caption.rb +15 -0
  36. data/lib/aozora2html/tag/multiline_chitsuki.rb +8 -0
  37. data/lib/aozora2html/tag/multiline_jisage.rb +8 -0
  38. data/lib/aozora2html/tag/multiline_midashi.rb +22 -0
  39. data/lib/aozora2html/tag/multiline_style.rb +17 -0
  40. data/lib/aozora2html/tag/multiline_yokogumi.rb +17 -0
  41. data/lib/aozora2html/tag/okurigana.rb +15 -0
  42. data/lib/aozora2html/tag/oneline_chitsuki.rb +9 -0
  43. data/lib/aozora2html/tag/oneline_indent.rb +2 -0
  44. data/lib/aozora2html/tag/oneline_jisage.rb +7 -0
  45. data/lib/aozora2html/tag/reference_mentioned.rb +46 -0
  46. data/lib/aozora2html/tag/ruby.rb +98 -0
  47. data/lib/aozora2html/tag/un_embed_gaiji.rb +24 -0
  48. data/lib/aozora2html/tag_parser.rb +53 -0
  49. data/lib/aozora2html/utils.rb +82 -0
  50. data/lib/aozora2html/version.rb +1 -1
  51. data/lib/aozora2html/yaml_loader.rb +31 -0
  52. data/lib/extensions.rb +31 -0
  53. data/lib/jstream.rb +68 -0
  54. data/lib/t2hs.rb +485 -1490
  55. data/test/test_aozora2html.rb +161 -57
  56. data/test/test_aozora_accent_parser.rb +13 -2
  57. data/test/test_command_parse.rb +213 -0
  58. data/test/test_dakuten_katakana_tag.rb +5 -4
  59. data/test/test_decorate_tag.rb +5 -4
  60. data/test/test_dir_tag.rb +5 -4
  61. data/test/test_editor_note_tag.rb +4 -4
  62. data/test/test_exception.rb +4 -4
  63. data/test/test_font_size_tag.rb +8 -7
  64. data/test/test_gaiji_tag.rb +14 -7
  65. data/test/test_header.rb +60 -0
  66. data/test/test_img_tag.rb +5 -4
  67. data/test/test_inline_caption_tag.rb +5 -4
  68. data/test/test_inline_font_size_tag.rb +7 -6
  69. data/test/test_inline_keigakomi_tag.rb +5 -4
  70. data/test/test_inline_yokogumi_tag.rb +5 -4
  71. data/test/test_jizume_tag.rb +7 -6
  72. data/test/test_jstream.rb +5 -5
  73. data/test/test_kaeriten_tag.rb +5 -4
  74. data/test/test_keigakomi_tag.rb +8 -7
  75. data/test/test_midashi_tag.rb +38 -0
  76. data/test/test_multiline_caption_tag.rb +7 -6
  77. data/test/test_multiline_midashi_tag.rb +12 -11
  78. data/test/test_multiline_style_tag.rb +7 -6
  79. data/test/test_multiline_yokogumi_tag.rb +7 -6
  80. data/test/test_okurigana_tag.rb +5 -4
  81. data/test/test_ruby_parse.rb +116 -0
  82. data/test/test_ruby_tag.rb +5 -4
  83. data/test/test_tag_parser.rb +10 -10
  84. data/yml/accent_table.yml +240 -0
  85. data/yml/command_table.yml +61 -0
  86. data/yml/jis2ucs.yml +11234 -0
  87. metadata +63 -6
  88. data/lib/accent_tag.rb +0 -23
  89. data/lib/aozora2html/jis2ucs.rb +0 -11237
  90. data/lib/embed_gaiji_tag.rb +0 -34
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 69f585eb774610409f4a56b1645bc2ebdf71ce35
4
- data.tar.gz: c2c28ed11b58ca3d6df952f3b5709d14184e48f4
3
+ metadata.gz: ecb4e24d58fac283904c18a996d7f19d308f4d69
4
+ data.tar.gz: 0e3738df57189ff06736a508a6aae1c79a34667e
5
5
  SHA512:
6
- metadata.gz: 9291546c4ff8e76d64e0e1eaa32474efcea5c6d952e8215841d2a0d1da0beccae37d01a4dd8d1689ebd9ff962b6511d1811e6e3b4d3b381be8d102ccd63356b4
7
- data.tar.gz: 23235e3da67c52b05cfb648275888ee82239f94e1f47d3a78261188ae4eaa110e1ca18dc5d9b8af1113e054e00f56586976fb5fb7fb99628175c2f0c2625d4ce
6
+ metadata.gz: 84e3a6e4e4dc1d274238a122001995226e0712657a6111080d410dbc1ba1d33e80f1df646b4936bf9553e701e85e0fa9d50ab37ec0afab51dda1fbcdaf04af65
7
+ data.tar.gz: fb2f60e2bd5a4f6ee9d0e9c4597915431b4d69c58420a0bd894efee97f49061f317e240a8dd151c1e90591fd82de55b14cc5e4bb37221c2c7fed57f82d85ca6a
@@ -4,53 +4,6 @@ require 'aozora2html'
4
4
  require 'optparse'
5
5
  require "tempfile"
6
6
 
7
- # override Aozora2Html#push_chars
8
- #
9
- # Original Aozora2Html#push_chars does not convert "'" into '''; it's old behaivor
10
- # of CGI.escapeHTML().
11
- #
12
- class Aozora2Html
13
- def push_chars(obj)
14
- if obj.is_a?(Array)
15
- obj.each{|x|
16
- push_chars(x)
17
- }
18
- elsif obj.is_a?(String)
19
- if obj.length == 1
20
- obj = obj.gsub(/[&\"<>]/, {'&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;'})
21
- end
22
- obj.each_char{|x|
23
- push_char(x)
24
- }
25
- else
26
- push_char(obj)
27
- end
28
- end
29
-
30
- def dispatch_gaiji
31
- hook = @stream.peek_char(0)
32
- if hook == "[".encode("shift_jis")
33
- read_char
34
- # embed?
35
- command,raw = read_to_nest("]".encode("shift_jis"))
36
- try_emb = kuten2png(command)
37
- if try_emb != command
38
- try_emb
39
- elsif command.match(/U\+([0-9A-F]{4,5})/) && Embed_Gaiji_tag.use_unicode
40
- unicode_num = $1
41
- ch = Embed_Gaiji_tag.new(self, nil, nil, command)
42
- ch.unicode = unicode_num
43
- ch
44
- else
45
- # Unemb
46
- escape_gaiji(command)
47
- end
48
- else
49
- "※".encode("shift_jis")
50
- end
51
- end
52
- end
53
-
54
7
  opt = OptionParser.new("Usage: aozora2html [options] <text file> [<html file>]\n")
55
8
  opt.on('--gaiji-dir DIR', 'setting gaiji directory')
56
9
  opt.on('--css-files FILES', 'setting css directory')
@@ -68,12 +21,12 @@ if options["css-files"]
68
21
  end
69
22
 
70
23
  if options["use-jisx0213"]
71
- Embed_Gaiji_tag.use_jisx0213 = true
72
- Accent_tag.use_jisx0213 = true
24
+ Aozora2Html::Tag::EmbedGaiji.use_jisx0213 = true
25
+ Aozora2Html::Tag::Accent.use_jisx0213 = true
73
26
  end
74
27
 
75
28
  if options["use-unicode"]
76
- Embed_Gaiji_tag.use_unicode = true
29
+ Aozora2Html::Tag::EmbedGaiji.use_unicode = true
77
30
  end
78
31
 
79
32
  if ARGV.size < 1 || ARGV.size > 2
@@ -1,9 +1,5 @@
1
1
  require "aozora2html/version"
2
- require "aozora2html/zip"
3
- require "aozora2html/jis2ucs"
4
2
  require 't2hs'
5
- require 'embed_gaiji_tag'
6
- require 'accent_tag'
7
3
 
8
4
  ## already defined in t2hs.rb
9
5
  class Aozora2Html
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ require 'aozora2html/ruby_buffer'
3
+ class Aozora2Html
4
+
5
+ # accent特殊文字を生かすための再帰呼び出し
6
+ class AccentParser < Aozora2Html
7
+
8
+ def initialize(input, endchar, chuuki, image)
9
+ if not(input.is_a?(Jstream))
10
+ raise ArgumentError, "tag_parser must supply Jstream as input"
11
+ end
12
+ @stream = input
13
+ @buffer = []
14
+ @ruby_buf = Aozora2Html::RubyBuffer.new
15
+ @chuuki_table = chuuki
16
+ @images = image # globalな環境を記録するアイテムは共有する必要あり
17
+ @endchar = endchar # 改行は越えられない <br />を出力していられない
18
+ @closed = nil # 改行での強制撤退チェックフラグ
19
+ @encount_accent = nil
20
+ end
21
+
22
+ def general_output # 出力は配列で返す
23
+ @ruby_buf.dump(@buffer)
24
+ if !@encount_accent
25
+ @buffer.unshift("〔".encode("shift_jis"))
26
+ end
27
+ if @closed and !@encount_accent
28
+ @buffer.push("〕".encode("shift_jis"))
29
+ elsif not(@closed)
30
+ @buffer.push("<br />\r\n")
31
+ end
32
+ @buffer
33
+ end
34
+
35
+ def parse
36
+ first = read_char
37
+ if found = Aozora2Html::ACCENT_TABLE[first]
38
+ if found2 = found[@stream.peek_char(0)]
39
+ if found2.is_a?(Hash)
40
+ if found3 = found2[@stream.peek_char(1)]
41
+ first = Aozora2Html::Tag::Accent.new(self, *found3)
42
+ @encount_accent = true
43
+ @chuuki_table[:accent] = true
44
+ read_char
45
+ read_char
46
+ end
47
+ elsif found2
48
+ first = Aozora2Html::Tag::Accent.new(self, *found2)
49
+ @encount_accent = true
50
+ read_char
51
+ @chuuki_table[:accent] = true
52
+ end
53
+ end
54
+ end
55
+ case first
56
+ when Aozora2Html::GAIJI_MARK
57
+ first = dispatch_gaiji
58
+ when "[".encode("shift_jis")
59
+ first = dispatch_aozora_command
60
+ when Aozora2Html::KU
61
+ assign_kunoji
62
+ when "《".encode("shift_jis")
63
+ first = apply_ruby
64
+ end
65
+ if first == "\r\n"
66
+ if @encount_accent
67
+ puts "警告(#{line_number}行目):アクセント分解の亀甲括弧の始めと終わりが、行中で揃っていません".encode("shift_jis")
68
+ end
69
+ throw :terminate
70
+ elsif first == "〕".encode("shift_jis")
71
+ @closed = true
72
+ throw :terminate
73
+ elsif first == RUBY_PREFIX
74
+ @ruby_buf.dump(@buffer)
75
+ @ruby_buf.protected = true
76
+ elsif first != "" and first != nil
77
+ illegal_char_check(first, line_number)
78
+ push_chars(first)
79
+ end
80
+ end
81
+
82
+ def process
83
+ catch(:terminate) do
84
+ loop do
85
+ parse
86
+ end
87
+ end
88
+ general_output
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,16 @@
1
+ require "aozora2html/i18n"
2
+
3
+ # 例外class
4
+ class Aozora2Html
5
+ class Error < StandardError
6
+
7
+ def initialize(message)
8
+ @message = message
9
+ end
10
+
11
+ def message(line)
12
+ I18n.t(:error_stop, line, @message)
13
+ end
14
+ end
15
+ end
16
+
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+ class Aozora2Html
3
+ class Header
4
+ def initialize()
5
+ @header = []
6
+ end
7
+
8
+ def push(line)
9
+ @header.push(line)
10
+ end
11
+
12
+ def out_header_info(hash, attr, true_name = nil)
13
+ found = hash[attr]
14
+ if found
15
+ "<h2 class=\"#{true_name or attr}\">#{found}</h2>\r\n"
16
+ else
17
+ ""
18
+ end
19
+ end
20
+
21
+ def header_element_type(string)
22
+ original = true
23
+ string.each_char do |x|
24
+ code = x.unpack("H*")[0]
25
+ if ("00" <= code and code <= "7f") or # 1byte
26
+ ("8140" <= code and code <= "8258") or # 1-1, 3-25
27
+ ("839f" <= code and code <= "8491") # 6-1, 7-81
28
+ # continue
29
+ else
30
+ original = false
31
+ break
32
+ end
33
+ end
34
+ if original
35
+ :original
36
+ elsif string.match(PAT_EDITOR)
37
+ :editor
38
+ elsif string.match(PAT_HENYAKU)
39
+ :henyaku
40
+ elsif string.match(PAT_TRANSLATOR)
41
+ :translator
42
+ end
43
+ end
44
+
45
+ def process_person(string, header_info)
46
+ type = header_element_type(string)
47
+ case type
48
+ when :editor
49
+ header_info[:editor] = string
50
+ when :translator
51
+ header_info[:translator] = string
52
+ when :henyaku
53
+ header_info[:henyaku] = string
54
+ else
55
+ type = :author
56
+ header_info[:author] = string
57
+ end
58
+ type
59
+ end
60
+
61
+ def build_title(header_info)
62
+ buf = [:author, :translator, :editor, :henyaku,
63
+ :title, :original_title,
64
+ :subtitle, :original_subtitle].map{|item| header_info[item]}.compact
65
+ buf_str = buf.join(" ")
66
+ "<title>#{buf_str}</title>"
67
+ end
68
+
69
+ def build_header_info
70
+ header_info = {:title => @header[0]}
71
+ case @header.length
72
+ when 2
73
+ process_person(@header[1], header_info)
74
+ when 3
75
+ if header_element_type(@header[1]) == :original
76
+ header_info[:original_title] = @header[1]
77
+ process_person(@header[2], header_info)
78
+ elsif process_person(@header[2], header_info) == :author
79
+ header_info[:subtitle] = @header[1]
80
+ else
81
+ header_info[:author] = @header[1]
82
+ end
83
+ when 4
84
+ if header_element_type(@header[1]) == :original
85
+ header_info[:original_title] = @header[1]
86
+ else
87
+ header_info[:subtitle] = @header[1]
88
+ end
89
+ if process_person(@header[3], header_info) == :author
90
+ header_info[:subtitle] = @header[2]
91
+ else
92
+ header_info[:author] = @header[2]
93
+ end
94
+ when 5
95
+ header_info[:original_title] = @header[1]
96
+ header_info[:subtitle] = @header[2]
97
+ header_info[:author] = @header[3]
98
+ if process_person(@header[4], header_info) == :author
99
+ raise Aozora2Html::Error, "parser encounted author twice"
100
+ end
101
+ when 6
102
+ header_info[:original_title] = @header[1]
103
+ header_info[:subtitle] = @header[2]
104
+ header_info[:original_subtitle] = @header[3]
105
+ header_info[:author] = @header[4]
106
+ if process_person(@header[5], header_info) == :author
107
+ raise Aozora2Html::Error, "parser encounted author twice"
108
+ end
109
+ end
110
+ header_info
111
+ end
112
+
113
+ def to_html
114
+ header_info = build_header_info()
115
+
116
+ # <title> 行を構築
117
+ html_title = build_title(header_info)
118
+
119
+ # 出力
120
+ out_buf = []
121
+ out_buf.push("<?xml version=\"1.0\" encoding=\"Shift_JIS\"?>\r\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\r\n \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"ja\" >\r\n<head>\r\n <meta http-equiv=\"Content-Type\" content=\"text/html;charset=Shift_JIS\" />\r\n <meta http-equiv=\"content-style-type\" content=\"text/css\" />\r\n")
122
+ $css_files.each do |css|
123
+ out_buf.push("\t<link rel=\"stylesheet\" type=\"text/css\" href=\"" + css + "\" />\r\n")
124
+ end
125
+ out_buf.push("\t#{html_title}\r\n <script type=\"text/javascript\" src=\"../../jquery-1.4.2.min.js\"></script>\r\n <link rel=\"Schema.DC\" href=\"http://purl.org/dc/elements/1.1/\" />\r\n <meta name=\"DC.Title\" content=\"#{header_info[:title]}\" />\r\n <meta name=\"DC.Creator\" content=\"#{header_info[:author]}\" />\r\n <meta name=\"DC.Publisher\" content=\"#{AOZORABUNKO}\" />\r\n</head>\r\n<body>\r\n<div class=\"metadata\">\r\n")
126
+ out_buf.push("<h1 class=\"title\">#{header_info[:title]}</h1>\r\n" +
127
+ out_header_info(header_info, :original_title) +
128
+ out_header_info(header_info, :subtitle) +
129
+ out_header_info(header_info, :original_subtitle) +
130
+ out_header_info(header_info, :author) +
131
+ out_header_info(header_info, :editor) +
132
+ out_header_info(header_info, :translator) +
133
+ out_header_info(header_info, :henyaku, "editor-translator"))
134
+ out_buf.push("<br />\r\n<br />\r\n</div>\r\n<div id=\"contents\" style=\"display:none\"></div><div class=\"main_text\">")
135
+ out_buf.join("")
136
+ end
137
+
138
+ end
139
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ class Aozora2Html
3
+ class I18n
4
+ MSG = {
5
+ :tag_syntax_error => "注記を重ねる際の原則、「狭い範囲を先に、広い範囲を後に」が守られていません。リンク先の指針を参考に、書き方をあらためてください",
6
+ :undefined_header => "未定義な見出しです",
7
+ :use_crlf => "改行コードを、「CR+LF」にあらためてください",
8
+ :error_stop => "エラー(%d行目):%s. \r\n処理を停止します",
9
+ :invalid_font_size => "文字サイズの指定が不正です",
10
+ :unsupported_ruby => "サポートされていない複雑なルビ付けです"
11
+ }
12
+
13
+ def self.t(msg, *args)
14
+ (MSG[msg].encode("shift_jis") % args)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,81 @@
1
+ class Aozora2Html
2
+ class RubyBuffer
3
+
4
+ # `|`が来た時に真にする。ルビの親文字のガード用。
5
+ attr_accessor :protected
6
+
7
+ # @ruby_buf内の文字のchar_type
8
+ attr_accessor :char_type
9
+
10
+ def initialize(item=nil)
11
+ clear(item)
12
+ end
13
+
14
+ # バッファの初期化。引数itemがあるときはその1要素のバッファに、
15
+ # 引数がなければ`""`の1要素のバッファにする。
16
+ def clear(item=nil)
17
+ if item
18
+ @ruby_buf = [item]
19
+ else
20
+ @ruby_buf = [""]
21
+ end
22
+ @protected = nil
23
+ @char_type = nil
24
+ end
25
+
26
+ def empty?
27
+ @ruby_buf.empty?
28
+ end
29
+
30
+ def present?
31
+ !empty?
32
+ end
33
+
34
+ def to_a
35
+ @ruby_buf
36
+ end
37
+
38
+ def each(&block)
39
+ @ruby_buf.each(&block)
40
+ end
41
+
42
+ def last
43
+ @ruby_buf.last
44
+ end
45
+
46
+ def push(item)
47
+ @ruby_buf.push(item)
48
+ end
49
+
50
+ def length
51
+ @ruby_buf.length
52
+ end
53
+
54
+ def last_concat(item)
55
+ @ruby_buf.last.concat(item)
56
+ end
57
+
58
+ def last_is_string?
59
+ @ruby_buf.last.is_a?(String)
60
+ end
61
+
62
+ # buffer management
63
+ def dump(buffer)
64
+ if @protected
65
+ @ruby_buf.unshift(RUBY_PREFIX)
66
+ @protected = nil
67
+ end
68
+ top = @ruby_buf[0]
69
+ if top.is_a?(String) and buffer.last.is_a?(String)
70
+ buffer.last.concat(top)
71
+ buffer.concat(@ruby_buf[1,@ruby_buf.length])
72
+ else
73
+ buffer.concat(@ruby_buf)
74
+ end
75
+ clear
76
+ buffer
77
+ end
78
+
79
+ end
80
+ end
81
+
@@ -0,0 +1,27 @@
1
+ class Aozora2Html
2
+ class StyleStack
3
+ def initialize
4
+ @stack = []
5
+ end
6
+
7
+ def push(elem)
8
+ @stack.push(elem)
9
+ end
10
+
11
+ def empty?
12
+ @stack.empty?
13
+ end
14
+
15
+ def pop
16
+ @stack.pop
17
+ end
18
+
19
+ def last
20
+ @stack.last
21
+ end
22
+
23
+ def last_command
24
+ @stack.last[0]
25
+ end
26
+ end
27
+ end