aozora2html 0.7.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +37 -0
  4. data/.gitignore +8 -3
  5. data/.rubocop.yml +111 -0
  6. data/.rubocop_todo.yml +7 -0
  7. data/CHANGELOG.md +59 -0
  8. data/Gemfile +2 -0
  9. data/Guardfile +3 -1
  10. data/HACKING.md +45 -0
  11. data/LICENSE +116 -0
  12. data/README.md +29 -16
  13. data/Rakefile +12 -5
  14. data/aozora2html.gemspec +24 -24
  15. data/bin/aozora2html +25 -71
  16. data/lib/aozora2html/accent_parser.rb +99 -0
  17. data/lib/aozora2html/error.rb +17 -0
  18. data/lib/aozora2html/header.rb +141 -0
  19. data/lib/aozora2html/i18n.rb +48 -0
  20. data/lib/aozora2html/ruby_buffer.rb +116 -0
  21. data/lib/aozora2html/string_refinements.rb +36 -0
  22. data/lib/aozora2html/style_stack.rb +33 -0
  23. data/lib/aozora2html/tag/accent.rb +37 -0
  24. data/lib/aozora2html/tag/block.rb +21 -0
  25. data/lib/aozora2html/tag/chitsuki.rb +19 -0
  26. data/lib/aozora2html/tag/dakuten_katakana.rb +25 -0
  27. data/lib/aozora2html/tag/decorate.rb +19 -0
  28. data/lib/aozora2html/tag/dir.rb +17 -0
  29. data/lib/aozora2html/tag/editor_note.rb +19 -0
  30. data/lib/aozora2html/tag/embed_gaiji.rb +52 -0
  31. data/lib/aozora2html/tag/font_size.rb +20 -0
  32. data/lib/aozora2html/tag/gaiji.rb +12 -0
  33. data/lib/aozora2html/tag/img.rb +21 -0
  34. data/lib/aozora2html/tag/indent.rb +8 -0
  35. data/lib/aozora2html/tag/inline.rb +16 -0
  36. data/lib/aozora2html/tag/inline_caption.rb +17 -0
  37. data/lib/aozora2html/tag/inline_font_size.rb +19 -0
  38. data/lib/aozora2html/tag/inline_keigakomi.rb +17 -0
  39. data/lib/aozora2html/tag/inline_yokogumi.rb +17 -0
  40. data/lib/aozora2html/tag/jisage.rb +17 -0
  41. data/lib/aozora2html/tag/jizume.rb +19 -0
  42. data/lib/aozora2html/tag/kaeriten.rb +17 -0
  43. data/lib/aozora2html/tag/keigakomi.rb +19 -0
  44. data/lib/aozora2html/tag/kunten.rb +12 -0
  45. data/lib/aozora2html/tag/midashi.rb +20 -0
  46. data/lib/aozora2html/tag/multiline.rb +9 -0
  47. data/lib/aozora2html/tag/multiline_caption.rb +13 -0
  48. data/lib/aozora2html/tag/multiline_chitsuki.rb +10 -0
  49. data/lib/aozora2html/tag/multiline_jisage.rb +10 -0
  50. data/lib/aozora2html/tag/multiline_midashi.rb +25 -0
  51. data/lib/aozora2html/tag/multiline_style.rb +19 -0
  52. data/lib/aozora2html/tag/multiline_yokogumi.rb +14 -0
  53. data/lib/aozora2html/tag/okurigana.rb +17 -0
  54. data/lib/aozora2html/tag/oneline_chitsuki.rb +10 -0
  55. data/lib/aozora2html/tag/oneline_indent.rb +9 -0
  56. data/lib/aozora2html/tag/oneline_jisage.rb +10 -0
  57. data/lib/aozora2html/tag/reference_mentioned.rb +47 -0
  58. data/lib/aozora2html/tag/ruby.rb +202 -0
  59. data/lib/aozora2html/tag/un_embed_gaiji.rb +30 -0
  60. data/lib/aozora2html/tag.rb +57 -0
  61. data/lib/aozora2html/tag_parser.rb +60 -0
  62. data/lib/aozora2html/text_buffer.rb +50 -0
  63. data/lib/aozora2html/utils.rb +156 -0
  64. data/lib/aozora2html/version.rb +3 -1
  65. data/lib/aozora2html/yaml_loader.rb +37 -0
  66. data/lib/aozora2html/zip.rb +4 -0
  67. data/lib/aozora2html.rb +1359 -8
  68. data/lib/extensions.rb +12 -0
  69. data/lib/jstream.rb +139 -0
  70. data/sample/chukiichiran_kinyurei.html +15 -2
  71. data/sample/chukiichiran_kinyurei.txt +15 -2
  72. data/test/test_aozora2html.rb +323 -73
  73. data/test/test_aozora_accent_parser.rb +34 -6
  74. data/test/test_command_parse.rb +216 -0
  75. data/test/test_compat.rb +3 -4
  76. data/test/test_dakuten_katakana_tag.rb +12 -13
  77. data/test/test_decorate_tag.rb +11 -7
  78. data/test/test_dir_tag.rb +11 -7
  79. data/test/test_editor_note_tag.rb +9 -6
  80. data/test/test_exception.rb +11 -9
  81. data/test/test_font_size_tag.rb +22 -11
  82. data/test/test_gaiji_tag.rb +22 -14
  83. data/test/test_header.rb +45 -0
  84. data/test/test_helper.rb +3 -1
  85. data/test/test_i18n.rb +39 -0
  86. data/test/test_img_tag.rb +11 -6
  87. data/test/test_inline_caption_tag.rb +11 -7
  88. data/test/test_inline_font_size_tag.rb +15 -11
  89. data/test/test_inline_keigakomi_tag.rb +11 -7
  90. data/test/test_inline_yokogumi_tag.rb +11 -7
  91. data/test/test_jizume_tag.rb +11 -8
  92. data/test/test_jstream.rb +33 -30
  93. data/test/test_kaeriten_tag.rb +11 -7
  94. data/test/test_keigakomi_tag.rb +14 -11
  95. data/test/test_midashi_tag.rb +39 -0
  96. data/test/test_multiline_caption_tag.rb +11 -8
  97. data/test/test_multiline_midashi_tag.rb +26 -26
  98. data/test/test_multiline_style_tag.rb +11 -8
  99. data/test/test_multiline_yokogumi_tag.rb +11 -8
  100. data/test/test_okurigana_tag.rb +11 -7
  101. data/test/test_ruby_parse.rb +130 -0
  102. data/test/test_ruby_tag.rb +11 -7
  103. data/test/test_tag_parser.rb +31 -29
  104. data/vendor/jis2ucs/README.md +3 -6
  105. data/yml/accent_table.yml +240 -0
  106. data/yml/command_table.yml +61 -0
  107. data/yml/jis2ucs.yml +11234 -0
  108. metadata +99 -21
  109. data/.travis.yml +0 -12
  110. data/appveyor.yml +0 -23
  111. data/lib/accent_tag.rb +0 -23
  112. data/lib/aozora2html/jis2ucs.rb +0 -11237
  113. data/lib/embed_gaiji_tag.rb +0 -34
  114. data/lib/t2hs.rb +0 -2535
data/lib/aozora2html.rb CHANGED
@@ -1,10 +1,1361 @@
1
- require "aozora2html/version"
2
- require "aozora2html/zip"
3
- require "aozora2html/jis2ucs"
4
- require 't2hs'
5
- require 'embed_gaiji_tag'
6
- require 'accent_tag'
7
-
8
- ## already defined in t2hs.rb
1
+ require_relative 'aozora2html/version'
2
+ require_relative 'extensions'
3
+ require_relative 'aozora2html/error'
4
+ require_relative 'aozora2html/i18n'
5
+ require_relative 'jstream'
6
+ require_relative 'aozora2html/tag'
7
+ require_relative 'aozora2html/tag_parser'
8
+ require_relative 'aozora2html/accent_parser'
9
+ require_relative 'aozora2html/style_stack'
10
+ require_relative 'aozora2html/header'
11
+ require_relative 'aozora2html/ruby_buffer'
12
+ require_relative 'aozora2html/text_buffer'
13
+ require_relative 'aozora2html/yaml_loader'
14
+ require_relative 'aozora2html/utils'
15
+ require_relative 'aozora2html/string_refinements'
16
+
17
+ # 青空文庫形式のテキストファイルを html に整形する ruby スクリプト
18
+ # 変換器本体
9
19
  class Aozora2Html
20
+ # 全角バックスラッシュが出せないから直打ち
21
+ KU = ['18e5'].pack('h*').force_encoding('shift_jis')
22
+ NOJI = ['18f5'].pack('h*').force_encoding('shift_jis')
23
+ DAKUTEN = ['18d8'].pack('h*').force_encoding('shift_jis')
24
+
25
+ using StringRefinements
26
+
27
+ GAIJI_MARK = '※'.to_sjis
28
+ IGETA_MARK = '#'.to_sjis
29
+ RUBY_BEGIN_MARK = '《'.to_sjis
30
+ RUBY_END_MARK = '》'.to_sjis
31
+ PAREN_BEGIN_MARK = '('.to_sjis
32
+ PAREN_END_MARK = ')'.to_sjis
33
+ SIZE_SMALL = '小'.to_sjis
34
+ SIZE_MIDDLE = '中'.to_sjis
35
+ SIZE_LARGE = '大'.to_sjis
36
+ TEIHON_MARK = '底本:'.to_sjis
37
+ COMMAND_BEGIN = '['.to_sjis
38
+ COMMAND_END = ']'.to_sjis
39
+ ACCENT_BEGIN = '〔'.to_sjis
40
+ ACCENT_END = '〕'.to_sjis
41
+ AOZORABUNKO = '青空文庫'.to_sjis
42
+ # PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/
43
+ PAT_EDITOR = '(校訂|編|編集)$'.to_sjis
44
+ PAT_HENYAKU = '編訳$'.to_sjis
45
+ PAT_TRANSLATOR = '訳$'.to_sjis
46
+ RUBY_PREFIX = '|'.to_sjis
47
+ PAT_RUBY = /#{"《.*?》".to_sjis}/.freeze
48
+ PAT_DIRECTION = '(右|左|上|下)に(.*)'.to_sjis
49
+ PAT_REF = '^「.+」'.to_sjis
50
+ CHUUKI_COMMAND = '注記付き'.to_sjis
51
+ TCY_COMMAND = '縦中横'.to_sjis
52
+ KEIGAKOMI_COMMAND = '罫囲み'.to_sjis
53
+ YOKOGUMI_COMMAND = '横組み'.to_sjis
54
+ CAPTION_COMMAND = 'キャプション'.to_sjis
55
+ WARIGAKI_COMMAND = '割書'.to_sjis
56
+ KAERITEN_COMMAND = '返り点'.to_sjis
57
+ KUNTEN_OKURIGANA_COMMAND = '訓点送り仮名'.to_sjis
58
+ MIDASHI_COMMAND = '見出し'.to_sjis
59
+ OMIDASHI_COMMAND = '大見出し'.to_sjis
60
+ NAKAMIDASHI_COMMAND = '中見出し'.to_sjis
61
+ KOMIDASHI_COMMAND = '小見出し'.to_sjis
62
+ DOGYO_OMIDASHI_COMMAND = '同行大見出し'.to_sjis
63
+ DOGYO_NAKAMIDASHI_COMMAND = '同行中見出し'.to_sjis
64
+ DOGYO_KOMIDASHI_COMMAND = '同行小見出し'.to_sjis
65
+ MADO_OMIDASHI_COMMAND = '窓大見出し'.to_sjis
66
+ MADO_NAKAMIDASHI_COMMAND = '窓中見出し'.to_sjis
67
+ MADO_KOMIDASHI_COMMAND = '窓小見出し'.to_sjis
68
+ LEFT_MARK = '左'.to_sjis
69
+ UNDER_MARK = '下'.to_sjis
70
+ OVER_MARK = '上'.to_sjis
71
+ MAIN_MARK = '本文'.to_sjis
72
+ END_MARK = '終わり'.to_sjis
73
+ TEN_MARK = '点'.to_sjis
74
+ SEN_MARK = '線'.to_sjis
75
+ OPEN_MARK = 'ここから'.to_sjis
76
+ CLOSE_MARK = 'ここで'.to_sjis
77
+ MADE_MARK = 'まで'.to_sjis
78
+ DOGYO_MARK = '同行'.to_sjis
79
+ MADO_MARK = '窓'.to_sjis
80
+ JIAGE_COMMAND = '字上げ'.to_sjis
81
+ JISAGE_COMMAND = '字下げ'.to_sjis
82
+ PHOTO_COMMAND = '写真'.to_sjis
83
+ ORIKAESHI_COMMAND = '折り返して'.to_sjis
84
+ ONELINE_COMMAND = 'この行'.to_sjis
85
+ NON_0213_GAIJI = '非0213外字'.to_sjis
86
+ WARICHU_COMMAND = '割り注'.to_sjis
87
+ TENTSUKI_COMMAND = '天付き'.to_sjis
88
+ PAT_REST_NOTES = '(左|下)に「(.*)」の(ルビ|注記|傍記)'.to_sjis
89
+ PAT_KUTEN = /#{"「※」[は|の]".to_sjis}/.freeze
90
+ PAT_KUTEN_DUAL = '※.*※'.to_sjis
91
+ PAT_GAIJI = '(?:#)(.*)(?:、)(.*)'.to_sjis
92
+ PAT_KAERITEN = '^([一二三四五六七八九十レ上中下甲乙丙丁天地人]+)$'.to_sjis
93
+ PAT_OKURIGANA = '^((.+))$'.to_sjis
94
+ PAT_REMOVE_OKURIGANA = /#{"[()]".to_sjis}/.freeze
95
+ PAT_CHITSUKI = /#{"(地付き|字上げ)(終わり)*$".to_sjis}/.freeze
96
+ PAT_ORIKAESHI_JISAGE = '折り返して(\\d*)字下げ'.to_sjis
97
+ PAT_ORIKAESHI_JISAGE2 = '(\\d*)字下げ、折り返して(\\d*)字下げ'.to_sjis
98
+ PAT_JI_LEN = '([0-9]+)字'.to_sjis
99
+ PAT_INLINE_RUBY = '「(.*)」の注記付き'.to_sjis
100
+ PAT_IMAGE = '(.*)((fig.+\\.png)(、横([0-9]+)×縦([0-9]+))*)入る'.to_sjis
101
+ PAT_FRONTREF = '「([^「」]*(?:「.+」)*[^「」]*)」[にはの](「.+」の)*(.+)'.to_sjis
102
+ PAT_RUBY_DIR = '(左|下)に「([^」]*)」の(ルビ|注記)'.to_sjis
103
+ PAT_CHUUKI = /#{"「(.+?)」の注記".to_sjis}/.freeze
104
+ PAT_BOUKI = /#{"「(.)」の傍記".to_sjis}/.freeze
105
+ PAT_CHARSIZE = /#{"(.*)段階(..)な文字".to_sjis}/.freeze
106
+
107
+ REGEX_HIRAGANA = Regexp.new('[ぁ-んゝゞ]'.to_sjis)
108
+ REGEX_KATAKANA = Regexp.new('[ァ-ンーヽヾヴ]'.to_sjis)
109
+ REGEX_ZENKAKU = Regexp.new('[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]'.to_sjis)
110
+ REGEX_HANKAKU = Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".to_sjis)
111
+ REGEX_KANJI = Regexp.new('[亜-熙々※仝〆〇ヶ]'.to_sjis)
112
+
113
+ DYNAMIC_CONTENTS = "<div id=\"card\">\r\n<hr />\r\n<br />\r\n<a href=\"JavaScript:goLibCard();\" id=\"goAZLibCard\">●図書カード</a><script type=\"text/javascript\" src=\"../../contents.js\"></script>\r\n<script type=\"text/javascript\" src=\"../../golibcard.js\"></script>\r\n</div>".to_sjis
114
+
115
+ # KUNOJI = ["18e518f5"].pack("h*")
116
+ # utf8 ["fecbf8fecbcb"].pack("h*")
117
+ # DAKUTENKUNOJI = ["18e518d818f5"].pack("h*")
118
+ # utf8 ["fecbf82e083bfecbcb"].pack("h*")
119
+
120
+ loader = Aozora2Html::YamlLoader.new(File.dirname(__FILE__))
121
+ ACCENT_TABLE = loader.load('../yml/accent_table.yml')
122
+
123
+ # [class, tag]
124
+ COMMAND_TABLE = loader.load('../yml/command_table.yml')
125
+ JIS2UCS = loader.load('../yml/jis2ucs.yml')
126
+
127
+ INDENT_TYPE = {
128
+ jisage: '字下げ'.to_sjis,
129
+ chitsuki: '地付き'.to_sjis,
130
+ midashi: '見出し'.to_sjis,
131
+ jizume: '字詰め'.to_sjis,
132
+ yokogumi: '横組み'.to_sjis,
133
+ keigakomi: '罫囲み'.to_sjis,
134
+ caption: 'キャプション'.to_sjis,
135
+ futoji: '太字'.to_sjis,
136
+ shatai: '斜体'.to_sjis,
137
+ dai: '大きな文字'.to_sjis,
138
+ sho: '小さな文字'.to_sjis
139
+ }.freeze
140
+
141
+ DAKUTEN_KATAKANA_TABLE = {
142
+ '2' => 'ワ゛'.to_sjis,
143
+ '3' => 'ヰ゛'.to_sjis,
144
+ '4' => 'ヱ゛'.to_sjis,
145
+ '5' => 'ヲ゛'.to_sjis
146
+ }.freeze
147
+
148
+ def initialize(input, output, gaiji_dir: nil, css_files: nil)
149
+ @stream = if input.respond_to?(:read) ## readable IO?
150
+ Jstream.new(input)
151
+ else
152
+ Jstream.new(File.open(input, 'rb:Shift_JIS'))
153
+ end
154
+ @out = if output.respond_to?(:print) ## writable IO?
155
+ output
156
+ else
157
+ File.open(output, 'w')
158
+ end
159
+ @gaiji_dir = gaiji_dir || '../../../gaiji/'
160
+ @css_files = css_files || Array['../../aozora.css']
161
+
162
+ @buffer = TextBuffer.new
163
+ @ruby_buf = RubyBuffer.new
164
+ @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
165
+ @header = Aozora2Html::Header.new(css_files: @css_files) ## ヘッダ行の配列
166
+ @style_stack = StyleStack.new ## スタイルのスタック
167
+ @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
168
+ @images = [] ## 使用した外字の画像保持用
169
+ @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
170
+ @tag_stack = []
171
+ @midashi_id = 0 ## 見出しのカウンタ、見出しの種類によって増分が異なる
172
+ @terprip = true ## 改行制御用 (terpriはLisp由来?)
173
+ @endchar = :eof ## 解析終了文字、AccentParserやTagParserでは異なる
174
+ @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
175
+ end
176
+
177
+ def line_number
178
+ @stream.line
179
+ end
180
+
181
+ def block_allowed_context?
182
+ # inline_tagが開いていないかチェックすれば十分
183
+ @style_stack.empty?
184
+ end
185
+
186
+ # parseする
187
+ #
188
+ # 終了時(終端まで来た場合)にはthrow :terminateで脱出する
189
+ #
190
+ def process
191
+ catch(:terminate) do
192
+ parse
193
+ rescue Aozora2Html::Error => e
194
+ puts e.message(line_number)
195
+ if e.is_a?(Aozora2Html::Error)
196
+ exit(2)
197
+ end
198
+ end
199
+ tail_output # final call
200
+ finalize
201
+ close
202
+ rescue StandardError => e
203
+ puts "ERROR: line: #{line_number}"
204
+ raise e
205
+ end
206
+
207
+ def new_midashi_id(size)
208
+ if size.is_a?(Integer)
209
+ @midashi_id += size
210
+ return @midashi_id
211
+ end
212
+
213
+ case size
214
+ when /#{SIZE_SMALL}/o
215
+ inc = 1
216
+ when /#{SIZE_MIDDLE}/o
217
+ inc = 10
218
+ when /#{SIZE_LARGE}/o
219
+ inc = 100
220
+ else
221
+ raise Aozora2Html::Error, I18n.t(:undefined_header)
222
+ end
223
+ @midashi_id += inc
224
+ end
225
+
226
+ def kuten2png(substring)
227
+ desc = substring.gsub(PAT_KUTEN, '')
228
+ matched = desc.match(/[12]-\d{1,2}-\d{1,2}/)
229
+ if matched && !desc.match?(NON_0213_GAIJI) && !desc.match?(PAT_KUTEN_DUAL)
230
+ @chuuki_table[:newjis] = true
231
+ codes = matched[0].split('-')
232
+ folder = sprintf('%1d-%02d', codes[0], codes[1])
233
+ code = sprintf('%1d-%02d-%02d', *codes)
234
+ Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK, ''), gaiji_dir: @gaiji_dir)
235
+ else
236
+ substring
237
+ end
238
+ end
239
+
240
+ # コマンド文字列からモードのシンボルを取り出す
241
+ #
242
+ # @return [Symbol]
243
+ #
244
+ def detect_command_mode(command)
245
+ if command.match?(INDENT_TYPE[:chitsuki] + END_MARK) || command.match?(JIAGE_COMMAND + END_MARK)
246
+ return :chitsuki
247
+ end
248
+
249
+ INDENT_TYPE.each_key do |key|
250
+ if command.match?(INDENT_TYPE[key])
251
+ return key
252
+ end
253
+ end
254
+ nil
255
+ end
256
+
257
+ private
258
+
259
+ # 一文字読み込む
260
+ def read_char
261
+ @stream.read_char
262
+ end
263
+
264
+ # 一行読み込む
265
+ def read_line
266
+ @stream.read_line
267
+ end
268
+
269
+ def read_accent
270
+ Aozora2Html::AccentParser.new(@stream, ACCENT_END, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
271
+ end
272
+
273
+ def read_to_nest(endchar)
274
+ Aozora2Html::TagParser.new(@stream, endchar, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
275
+ end
276
+
277
+ def finalize
278
+ hyoki
279
+ dynamic_contents
280
+ @out.print("</body>\r\n</html>\r\n")
281
+ end
282
+
283
+ def dynamic_contents
284
+ @out.print DYNAMIC_CONTENTS
285
+ end
286
+
287
+ def close
288
+ @stream.close
289
+ @out.close
290
+ end
291
+
292
+ # 記法のシンボル名から文字列へ変換する
293
+ # シンボルが見つからなければそのまま返す
294
+ def convert_indent_type(type)
295
+ INDENT_TYPE[type] || type
296
+ end
297
+
298
+ def check_close_match(type)
299
+ ind = if @indent_stack.last.is_a?(String)
300
+ @noprint = true
301
+ :jisage
302
+ else
303
+ @indent_stack.last
304
+ end
305
+ if ind == type
306
+ nil
307
+ else
308
+ convert_indent_type(type)
309
+ end
310
+ end
311
+
312
+ def implicit_close(type)
313
+ return unless @indent_stack.last
314
+
315
+ if check_close_match(type)
316
+ # ok, nested multiline tags, go ahead
317
+ else
318
+ # not nested, please close
319
+ @indent_stack.pop
320
+ tag = @tag_stack.pop
321
+ if tag
322
+ push_chars(tag)
323
+ end
324
+ end
325
+ end
326
+
327
+ # 本文が終わってよいかチェックし、終わっていなければ例外をあげる
328
+ def ensure_close
329
+ n = @indent_stack.last
330
+ return unless n
331
+
332
+ raise Aozora2Html::Error, I18n.t(:terminate_in_style, convert_indent_type(n))
333
+ end
334
+
335
+ def explicit_close(type)
336
+ n = check_close_match(type)
337
+ if n
338
+ raise Aozora2Html::Error, I18n.t(:invalid_closing, n, n)
339
+ end
340
+
341
+ tag = @tag_stack.pop
342
+ return unless tag
343
+
344
+ push_chars(tag)
345
+ end
346
+
347
+ # main loop
348
+ def parse
349
+ loop do
350
+ case @section
351
+ when :head
352
+ parse_header
353
+ when :head_end
354
+ judge_chuuki
355
+ when :chuuki, :chuuki_in
356
+ parse_chuuki
357
+ when :body
358
+ parse_body
359
+ when :tail
360
+ parse_tail
361
+ else
362
+ raise Aozora2Html::Error, 'encount undefined condition'
363
+ end
364
+ end
365
+ end
366
+
367
+ def judge_chuuki
368
+ # 注記が入るかどうかチェック
369
+ i = 0
370
+ loop do
371
+ case @stream.peek_char(i)
372
+ when '-'
373
+ i += 1
374
+ when "\r\n"
375
+ @section = if i == 0
376
+ :body
377
+ else
378
+ :chuuki
379
+ end
380
+ return
381
+ else
382
+ @section = :body
383
+ @out.print("<br />\r\n")
384
+ return
385
+ end
386
+ end
387
+ end
388
+
389
+ # headerは一行ずつ読む
390
+ def parse_header
391
+ string = read_line
392
+ # refine from Tomita 09/06/14
393
+ if string == '' # 空行がくれば、そこでヘッダー終了とみなす
394
+ @section = :head_end
395
+ @out.print @header.to_html
396
+ else
397
+ string.gsub!(RUBY_PREFIX, '')
398
+ string.gsub!(PAT_RUBY, '')
399
+ @header.push(string)
400
+ end
401
+ end
402
+
403
+ def parse_chuuki
404
+ string = read_line
405
+ return unless string.match?(/^-+$/)
406
+
407
+ case @section
408
+ when :chuuki
409
+ @section = :chuuki_in
410
+ when :chuuki_in
411
+ @section = :body
412
+ end
413
+ end
414
+
415
+ # 本体解析部
416
+ #
417
+ # 1文字ずつ読み込み、dispatchして@buffer,@ruby_bufへしまう
418
+ # 改行コードに当たったら溜め込んだものをgeneral_outputする
419
+ #
420
+ def parse_body
421
+ char = read_char
422
+ check = true
423
+ case char
424
+ when ACCENT_BEGIN
425
+ check = false
426
+ char = read_accent
427
+ when TEIHON_MARK[0]
428
+ if @buffer.length == 0
429
+ ending_check
430
+ end
431
+ when GAIJI_MARK
432
+ char = dispatch_gaiji
433
+ when COMMAND_BEGIN
434
+ char = dispatch_aozora_command
435
+ when KU
436
+ assign_kunoji
437
+ when RUBY_BEGIN_MARK
438
+ char = apply_ruby
439
+ end
440
+
441
+ case char
442
+ when "\r\n"
443
+ general_output
444
+ when RUBY_PREFIX
445
+ @ruby_buf.dump_into(@buffer)
446
+ @ruby_buf.protected = true
447
+ when @endchar
448
+ # suddenly finished the file
449
+ puts I18n.t(:warn_unexpected_terminator, line_number)
450
+ throw :terminate
451
+ when nil
452
+ # noop
453
+ else
454
+ if check
455
+ Utils.illegal_char_check(char, line_number)
456
+ end
457
+ push_chars(escape_special_chars(char))
458
+ end
459
+ end
460
+
461
+ # 本文が終了したかどうかチェックする
462
+ #
463
+ #
464
+ def ending_check
465
+ # `底本:`でフッタ(:tail)に遷移
466
+ return unless @stream.peek_char(0) == TEIHON_MARK[1] && @stream.peek_char(1) == TEIHON_MARK[2]
467
+
468
+ @section = :tail
469
+ ensure_close
470
+ @out.print "</div>\r\n<div class=\"bibliographical_information\">\r\n<hr />\r\n<br />\r\n"
471
+ end
472
+
473
+ def push_chars(obj)
474
+ case obj
475
+ when Array
476
+ obj.each do |x|
477
+ push_chars(x)
478
+ end
479
+ when String
480
+ obj.each_char do |x|
481
+ push_char(x)
482
+ end
483
+ else
484
+ push_char(obj)
485
+ end
486
+ end
487
+
488
+ def push_char(char)
489
+ @ruby_buf.push_char(char, @buffer)
490
+ end
491
+
492
+ # 読み込んだ行の出力を行う
493
+ #
494
+ # parserが改行文字を読み込んだら呼ばれる。
495
+ # 最終的に@ruby_bufと@bufferは初期化する
496
+ #
497
+ # @return [void]
498
+ #
499
+ def general_output
500
+ if @style_stack.last
501
+ raise Aozora2Html::Error, I18n.t(:dont_crlf_in_style, @style_stack.last_command)
502
+ end
503
+
504
+ # bufferにインデントタグだけがあったら改行しない!
505
+ if @noprint
506
+ @noprint = false
507
+ return
508
+ end
509
+ @ruby_buf.dump_into(@buffer)
510
+ buf = @buffer
511
+ @buffer = TextBuffer.new
512
+ tail = []
513
+
514
+ indent_type = buf.blank_type
515
+ terprip = buf.terpri? && @terprip
516
+ @terprip = true
517
+
518
+ if @indent_stack.last.is_a?(String) && !indent_type
519
+ @out.print @indent_stack.last
520
+ end
521
+
522
+ buf.each do |s|
523
+ if s.is_a?(Aozora2Html::Tag::OnelineIndent)
524
+ tail.unshift(s.close_tag)
525
+ elsif s.is_a?(Aozora2Html::Tag::UnEmbedGaiji) && !s.escaped?
526
+ # 消してあった※を復活させて
527
+ @out.print GAIJI_MARK
528
+ end
529
+ @out.print s.to_s
530
+ end
531
+
532
+ # 最後はCRLFを出力する
533
+ if @indent_stack.last.is_a?(String)
534
+ # ぶら下げindent
535
+ # tail always active
536
+ @out.print tail.map(&:to_s).join
537
+ if indent_type == :inline
538
+ @out.print "\r\n"
539
+ elsif indent_type && terprip
540
+ @out.print "<br />\r\n"
541
+ else
542
+ @out.print "</div>\r\n"
543
+ end
544
+ elsif tail.empty? && terprip
545
+ @out.print "<br />\r\n"
546
+ else
547
+ @out.print tail.map(&:to_s).join
548
+ @out.print "\r\n"
549
+ end
550
+ end
551
+
552
+ # 前方参照の発見 Ruby,style重ねがけ等々のため、要素の配列で返す
553
+ #
554
+ # 前方参照は`○○[#「○○」に傍点]`、`吹喋[#「喋」に「ママ」の注記]`といった表記
555
+ #
556
+ # @return [TextBuffer|false]
557
+ def search_front_reference(string)
558
+ if string.length == 0
559
+ return false
560
+ end
561
+
562
+ searching_buf = if @ruby_buf.present?
563
+ @ruby_buf.to_a
564
+ else
565
+ @buffer
566
+ end
567
+ last_string = searching_buf.last
568
+ case last_string
569
+ when String
570
+ if last_string == ''
571
+ searching_buf.pop
572
+ search_front_reference(string)
573
+ elsif last_string.match?(Regexp.new("#{Regexp.quote(string)}$"))
574
+ # 完全一致
575
+ # start = match.begin(0)
576
+ # tail = match.end(0)
577
+ # last_string[start,tail-start] = ""
578
+ searching_buf.pop
579
+ searching_buf.push(last_string.sub(Regexp.new("#{Regexp.quote(string)}$"), ''))
580
+ TextBuffer.new([string])
581
+ elsif string.match?(Regexp.new("#{Regexp.quote(last_string)}$"))
582
+ # 部分一致
583
+ tmp = searching_buf.pop
584
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(last_string)}$"), ''))
585
+ if found
586
+ found.push(tmp)
587
+ found
588
+ else
589
+ searching_buf.push(tmp)
590
+ false
591
+ end
592
+ end
593
+ when Aozora2Html::Tag::ReferenceMentioned
594
+ inner = last_string.target_string
595
+ if inner == string
596
+ # 完全一致
597
+ searching_buf.pop
598
+ TextBuffer.new([last_string])
599
+ elsif string.match?(Regexp.new("#{Regexp.quote(inner)}$"))
600
+ # 部分一致
601
+ tmp = searching_buf.pop
602
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(inner)}$"), ''))
603
+ if found
604
+ found.push(tmp)
605
+ found
606
+ else
607
+ searching_buf.push(tmp)
608
+ false
609
+ end
610
+ end
611
+ else
612
+ false
613
+ end
614
+ end
615
+
616
+ # 発見した前方参照を元に戻す
617
+ #
618
+ # @ruby_bufがあれば@ruby_bufに、なければ@bufferにpushする
619
+ # バッファの最後と各要素が文字列ならconcatし、どちらが文字列でなければ(concatできないので)pushする
620
+ #
621
+ # @return [void]
622
+ #
623
+ def recovery_front_reference(reference)
624
+ reference.each do |elt|
625
+ # if @ruby_buf.protected
626
+ if @ruby_buf.present?
627
+ @ruby_buf.push(elt)
628
+ elsif @buffer.last.is_a?(String)
629
+ if elt.is_a?(String)
630
+ @buffer.last.concat(elt)
631
+ else
632
+ @buffer.push(elt)
633
+ end
634
+ else # rubocop:disable Lint/DuplicateBranch
635
+ @ruby_buf.push(elt)
636
+ end
637
+ end
638
+ end
639
+
640
+ def escape_gaiji(command)
641
+ _whole, kanji, line = command.match(PAT_GAIJI).to_a
642
+ tmp = @images.assoc(kanji)
643
+ if tmp
644
+ tmp.push(line)
645
+ else
646
+ @images.push([kanji, line])
647
+ end
648
+ Aozora2Html::Tag::UnEmbedGaiji.new(self, command)
649
+ end
650
+
651
+ def dispatch_gaiji
652
+ # 「※」の次が「[」でなければ外字ではない
653
+ if @stream.peek_char(0) != COMMAND_BEGIN
654
+ return GAIJI_MARK
655
+ end
656
+
657
+ # 「[」を読み捨てる
658
+ _ = read_char
659
+ # embed?
660
+ command, _raw = read_to_nest(COMMAND_END)
661
+ try_emb = kuten2png(command)
662
+ if try_emb != command
663
+ return try_emb
664
+ end
665
+
666
+ matched = command.match(/U\+([0-9A-F]{4,5})/)
667
+ if matched && Aozora2Html::Tag::EmbedGaiji.use_unicode
668
+ unicode_num = matched[1]
669
+ Aozora2Html::Tag::EmbedGaiji.new(self, nil, nil, command, unicode_num, gaiji_dir: @gaiji_dir)
670
+ else
671
+ # Unemb
672
+ escape_gaiji(command)
673
+ end
674
+ end
675
+
676
+ # 注記記法の場合分け
677
+ def dispatch_aozora_command
678
+ # 「[」の次が「#」でなければ注記ではない
679
+ if @stream.peek_char(0) != IGETA_MARK
680
+ return COMMAND_BEGIN
681
+ end
682
+
683
+ # 「#」を読み捨てる
684
+ _ = read_char
685
+ command, raw = read_to_nest(COMMAND_END)
686
+ # 適用順序はこれで大丈夫か? 誤爆怖いよ誤爆
687
+ if command.match?(ORIKAESHI_COMMAND)
688
+ apply_burasage(command)
689
+
690
+ elsif command.start_with?(OPEN_MARK)
691
+ exec_block_start_command(command)
692
+ elsif command.start_with?(CLOSE_MARK)
693
+ exec_block_end_command(command)
694
+
695
+ elsif command.match?(WARICHU_COMMAND)
696
+ apply_warichu(command)
697
+ elsif command.match?(JISAGE_COMMAND)
698
+ apply_jisage(command)
699
+ elsif command.match?(/fig(\d)+_(\d)+\.png/)
700
+ exec_img_command(command, raw)
701
+ # avoid to try complex ruby -- escape to notes
702
+ elsif command.match?(PAT_REST_NOTES)
703
+ apply_rest_notes(command)
704
+ elsif command.end_with?(END_MARK)
705
+ exec_inline_end_command(command)
706
+ nil
707
+ elsif command.match?(PAT_REF)
708
+ exec_frontref_command(command)
709
+ elsif command.match?(/1-7-8[2345]/)
710
+ apply_dakuten_katakana(command)
711
+ elsif command.match?(PAT_KAERITEN)
712
+ Aozora2Html::Tag::Kaeriten.new(self, command)
713
+ elsif command.match?(PAT_OKURIGANA)
714
+ Aozora2Html::Tag::Okurigana.new(self, command.gsub!(PAT_REMOVE_OKURIGANA, ''))
715
+ elsif command.match?(PAT_CHITSUKI)
716
+ apply_chitsuki(command)
717
+ elsif exec_inline_start_command(command)
718
+ nil
719
+ else # rubocop:disable Lint/DuplicateBranch
720
+ apply_rest_notes(command)
721
+ end
722
+ end
723
+
724
+ def apply_burasage(command)
725
+ tag = nil
726
+ if implicit_close(:jisage)
727
+ @terprip = false
728
+ general_output
729
+ end
730
+ @noprint = true # always no print
731
+ command = Utils.convert_japanese_number(command)
732
+ if command.match?(TENTSUKI_COMMAND)
733
+ width = command.match(PAT_ORIKAESHI_JISAGE)[1]
734
+ tag = "<div class=\"burasage\" style=\"margin-left: #{width}em; text-indent: -#{width}em;\">"
735
+ else
736
+ matched = command.match(PAT_ORIKAESHI_JISAGE2)
737
+ left, indent = matched.to_a[1, 2]
738
+ left = left.to_i - indent.to_i
739
+ tag = "<div class=\"burasage\" style=\"margin-left: #{indent}em; text-indent: #{left}em;\">"
740
+ end
741
+ @indent_stack.push(tag)
742
+ @tag_stack.push('') # dummy
743
+ nil
744
+ end
745
+
746
+ def jisage_width(command)
747
+ Utils.convert_japanese_number(command).match(/(\d*)(?:#{JISAGE_COMMAND})/o)[1]
748
+ end
749
+
750
+ def apply_jisage(command)
751
+ if command.match?(MADE_MARK) || command.match?(END_MARK)
752
+ # 字下げ終わり
753
+ explicit_close(:jisage)
754
+ @indent_stack.pop
755
+ nil
756
+ elsif command.match?(ONELINE_COMMAND)
757
+ # 1行だけ
758
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
759
+ nil
760
+ elsif (@buffer.length == 0) && (@stream.peek_char(0) == "\r\n")
761
+ # commandのみ
762
+ @terprip = false
763
+ implicit_close(:jisage)
764
+ # adhook hack
765
+ @noprint = false
766
+ @indent_stack.push(:jisage)
767
+ Aozora2Html::Tag::MultilineJisage.new(self, jisage_width(command))
768
+ else # rubocop:disable Lint/DuplicateBranch
769
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
770
+ nil
771
+ end
772
+ end
773
+
774
+ def apply_warichu(command)
775
+ if command.match?(END_MARK)
776
+ if @stream.peek_char(0) != PAREN_END_MARK
777
+ push_char(PAREN_END_MARK)
778
+ end
779
+ push_char('</span>')
780
+ else
781
+ check = @ruby_buf.last
782
+
783
+ # NOTE: Do not remove duplicates!
784
+ # rubocop:disable Style/IdenticalConditionalBranches
785
+ if check.is_a?(String) && check.end_with?(PAREN_BEGIN_MARK)
786
+ push_char('<span class="warichu">')
787
+ else
788
+ push_char('<span class="warichu">')
789
+ push_char(PAREN_BEGIN_MARK)
790
+ end
791
+ # rubocop:enable Style/IdenticalConditionalBranches
792
+ end
793
+ nil
794
+ end
795
+
796
+ def chitsuki_length(command)
797
+ command = Utils.convert_japanese_number(command)
798
+ matched = command.match(PAT_JI_LEN)
799
+ if matched
800
+ matched[1]
801
+ else
802
+ '0'
803
+ end
804
+ end
805
+
806
+ def apply_chitsuki(string, multiline: false)
807
+ if string.match?(CLOSE_MARK + INDENT_TYPE[:chitsuki] + END_MARK) ||
808
+ string.match?(CLOSE_MARK + JIAGE_COMMAND + END_MARK)
809
+ explicit_close(:chitsuki)
810
+ @indent_stack.pop
811
+ nil
812
+ else
813
+ len = chitsuki_length(string)
814
+ if multiline
815
+ # 複数行指定
816
+ implicit_close(:chitsuki)
817
+ @indent_stack.push(:chitsuki)
818
+ Aozora2Html::Tag::MultilineChitsuki.new(self, len)
819
+ else
820
+ # 1行のみ
821
+ Aozora2Html::Tag::OnelineChitsuki.new(self, len)
822
+ end
823
+ end
824
+ end
825
+
826
+ def apply_midashi(command)
827
+ @indent_stack.push(:midashi)
828
+ if command.match?(DOGYO_MARK)
829
+ midashi_type = :dogyo
830
+ elsif command.match?(MADO_MARK)
831
+ midashi_type = :mado
832
+ else
833
+ midashi_type = :normal
834
+ @terprip = false
835
+ end
836
+ Aozora2Html::Tag::MultilineMidashi.new(self, command, midashi_type)
837
+ end
838
+
839
+ def apply_yokogumi(_command)
840
+ @indent_stack.push(:yokogumi)
841
+ Aozora2Html::Tag::MultilineYokogumi.new(self)
842
+ end
843
+
844
+ def apply_keigakomi(_command)
845
+ @indent_stack.push(:keigakomi)
846
+ Aozora2Html::Tag::Keigakomi.new(self)
847
+ end
848
+
849
+ def apply_caption(_command)
850
+ @indent_stack.push(:caption)
851
+ Aozora2Html::Tag::MultilineCaption.new(self)
852
+ end
853
+
854
+ def apply_jizume(command)
855
+ w = Utils.convert_japanese_number(command).match(/(\d*)(?:#{INDENT_TYPE[:jizume]})/)[1]
856
+ @indent_stack.push(:jizume)
857
+ Aozora2Html::Tag::Jizume.new(self, w)
858
+ end
859
+
860
+ def push_block_tag(tag, closing)
861
+ push_char(tag)
862
+ closing.concat(tag.close_tag)
863
+ end
864
+
865
+ def detect_style_size(style)
866
+ if style.match?('小'.to_sjis)
867
+ :sho
868
+ else
869
+ :dai
870
+ end
871
+ end
872
+
873
+ def exec_inline_start_command(command)
874
+ case command
875
+ when CHUUKI_COMMAND
876
+ @style_stack.push([command, '</ruby>'])
877
+ push_char('<ruby><rb>')
878
+ when TCY_COMMAND
879
+ @style_stack.push([command, '</span>'])
880
+ push_char('<span dir="ltr">')
881
+ when KEIGAKOMI_COMMAND
882
+ @style_stack.push([command, '</span>'])
883
+ push_char('<span class="keigakomi">')
884
+ when YOKOGUMI_COMMAND
885
+ @style_stack.push([command, '</span>'])
886
+ push_char('<span class="yokogumi">')
887
+ when CAPTION_COMMAND
888
+ @style_stack.push([command, '</span>'])
889
+ push_char('<span class="caption">')
890
+ when WARIGAKI_COMMAND
891
+ @style_stack.push([command, '</span>'])
892
+ push_char('<span class="warigaki">')
893
+ when OMIDASHI_COMMAND
894
+ @style_stack.push([command, '</a></h3>'])
895
+ @terprip = false
896
+ push_char("<h3 class=\"o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
897
+ when NAKAMIDASHI_COMMAND
898
+ @style_stack.push([command, '</a></h4>'])
899
+ @terprip = false
900
+ push_char("<h4 class=\"naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
901
+ when KOMIDASHI_COMMAND
902
+ @style_stack.push([command, '</a></h5>'])
903
+ @terprip = false
904
+ push_char("<h5 class=\"ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
905
+ when DOGYO_OMIDASHI_COMMAND
906
+ @style_stack.push([command, '</a></h3>'])
907
+ push_char("<h3 class=\"dogyo-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
908
+ when DOGYO_NAKAMIDASHI_COMMAND
909
+ @style_stack.push([command, '</a></h4>'])
910
+ push_char("<h4 class=\"dogyo-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
911
+ when DOGYO_KOMIDASHI_COMMAND
912
+ @style_stack.push([command, '</a></h5>'])
913
+ push_char("<h5 class=\"dogyo-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
914
+ when MADO_OMIDASHI_COMMAND
915
+ @style_stack.push([command, '</a></h3>'])
916
+ push_char("<h3 class=\"mado-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
917
+ when MADO_NAKAMIDASHI_COMMAND
918
+ @style_stack.push([command, '</a></h4>'])
919
+ push_char("<h4 class=\"mado-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
920
+ when MADO_KOMIDASHI_COMMAND
921
+ @style_stack.push([command, '</a></h5>'])
922
+ push_char("<h5 class=\"mado-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
923
+ when PAT_CHARSIZE
924
+ @style_stack.push([command, '</span>'])
925
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
926
+ times = Utils.convert_japanese_number(nest).to_i
927
+ daisho = detect_style_size(style)
928
+ html_class = daisho.to_s + times.to_s
929
+ size = Utils.create_font_size(times, daisho)
930
+ push_char("<span class=\"#{html_class}\" style=\"font-size: #{size};\">")
931
+ else
932
+ ## Decoration ##
933
+ key = command
934
+ filter = ->(x) { x }
935
+ if command.match?(PAT_DIRECTION)
936
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
937
+ # renew command
938
+ key = com
939
+ if command.match?(TEN_MARK)
940
+ case dir
941
+ when LEFT_MARK, UNDER_MARK
942
+ filter = ->(x) { "#{x}_after" }
943
+ end
944
+ elsif command.match?(SEN_MARK)
945
+ case dir
946
+ when LEFT_MARK, OVER_MARK
947
+ filter = ->(x) { x.sub('under', 'over') }
948
+ end
949
+ end
950
+ end
951
+
952
+ found = COMMAND_TABLE[key]
953
+ # found = [class, tag]
954
+ if found
955
+ @style_stack.push([command, "</#{found[1]}>"])
956
+ push_char("<#{found[1]} class=\"#{filter.call(found[0])}\">")
957
+ else
958
+ if $DEBUG
959
+ puts I18n.t(:warn_undefined_command, line_number, key)
960
+ end
961
+ nil
962
+ end
963
+ end
964
+ end
965
+
966
+ def exec_inline_end_command(command)
967
+ encount = command.sub(END_MARK, '')
968
+ if encount == MAIN_MARK
969
+ # force to finish main_text
970
+ @section = :tail
971
+ ensure_close
972
+ @noprint = true
973
+ @out.print "</div>\r\n<div class=\"after_text\">\r\n<hr />\r\n"
974
+ elsif encount.match?(CHUUKI_COMMAND) && (@style_stack.last_command == CHUUKI_COMMAND)
975
+ # special inline ruby
976
+ @style_stack.pop
977
+ _whole, ruby = encount.match(PAT_INLINE_RUBY).to_a
978
+ push_char('</rb><rp>(</rp><rt>'.to_sjis + ruby + '</rt><rp>)</rp></ruby>'.to_sjis)
979
+ elsif @style_stack.last_command.match?(encount)
980
+ push_char(@style_stack.pop[1])
981
+ else
982
+ raise Aozora2Html::Error, I18n.t(:invalid_nesting, encount, @style_stack.last_command)
983
+ end
984
+ end
985
+
986
+ def exec_block_start_command(command)
987
+ original_command = command.dup
988
+ command.sub!(/^#{OPEN_MARK}/o, '')
989
+ match_buf = +''
990
+ if command.match?(INDENT_TYPE[:jisage])
991
+ push_block_tag(apply_jisage(command), match_buf)
992
+ elsif command.match?(/(#{INDENT_TYPE[:chitsuki]}|#{JIAGE_COMMAND})$/)
993
+ push_block_tag(apply_chitsuki(command, multiline: true), match_buf)
994
+ end
995
+
996
+ if command.match?(INDENT_TYPE[:midashi])
997
+ push_block_tag(apply_midashi(command), match_buf)
998
+ end
999
+
1000
+ if command.match?(INDENT_TYPE[:jizume])
1001
+ if match_buf != ''
1002
+ @indent_stack.pop
1003
+ end
1004
+ push_block_tag(apply_jizume(command), match_buf)
1005
+ end
1006
+
1007
+ if command.match?(INDENT_TYPE[:yokogumi])
1008
+ if match_buf != ''
1009
+ @indent_stack.pop
1010
+ end
1011
+ push_block_tag(apply_yokogumi(command), match_buf)
1012
+ end
1013
+
1014
+ if command.match?(INDENT_TYPE[:keigakomi])
1015
+ if match_buf != ''
1016
+ @indent_stack.pop
1017
+ end
1018
+ push_block_tag(apply_keigakomi(command), match_buf)
1019
+ end
1020
+
1021
+ if command.match?(INDENT_TYPE[:caption])
1022
+ if match_buf != ''
1023
+ @indent_stack.pop
1024
+ end
1025
+ push_block_tag(apply_caption(command), match_buf)
1026
+ end
1027
+
1028
+ if command.match?(INDENT_TYPE[:futoji])
1029
+ if match_buf != ''
1030
+ @indent_stack.pop
1031
+ end
1032
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'futoji'), match_buf)
1033
+ @indent_stack.push(:futoji)
1034
+ end
1035
+ if command.match?(INDENT_TYPE[:shatai])
1036
+ if match_buf != ''
1037
+ @indent_stack.pop
1038
+ end
1039
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'shatai'), match_buf)
1040
+ @indent_stack.push(:shatai)
1041
+ end
1042
+
1043
+ if command.match?(PAT_CHARSIZE)
1044
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1045
+ if match_buf != ''
1046
+ @indent_stack.pop
1047
+ end
1048
+ daisho = detect_style_size(style)
1049
+ push_block_tag(Aozora2Html::Tag::FontSize.new(self,
1050
+ Utils.convert_japanese_number(nest).to_i,
1051
+ daisho),
1052
+ match_buf)
1053
+ @indent_stack.push(daisho)
1054
+ end
1055
+
1056
+ if match_buf == ''
1057
+ apply_rest_notes(original_command)
1058
+ else
1059
+ @tag_stack.push(match_buf)
1060
+ nil
1061
+ end
1062
+ end
1063
+
1064
+ def exec_block_end_command(command)
1065
+ original_command = command.dup
1066
+ command.sub!(/^#{CLOSE_MARK}/o, '')
1067
+ matched = false
1068
+ mode = detect_command_mode(command)
1069
+ if mode
1070
+ explicit_close(mode)
1071
+ matched = @indent_stack.pop
1072
+ end
1073
+
1074
+ if matched
1075
+ unless matched.is_a?(String)
1076
+ @terprip = false
1077
+ end
1078
+ nil
1079
+ else
1080
+ apply_rest_notes(original_command)
1081
+ end
1082
+ end
1083
+
1084
+ def exec_img_command(command, raw)
1085
+ matched = raw.match(PAT_IMAGE)
1086
+ if matched
1087
+ _whole, alt, src, _wh, width, height = matched.to_a
1088
+ css_class = if alt.match?(PHOTO_COMMAND)
1089
+ 'photo'
1090
+ else
1091
+ 'illustration'
1092
+ end
1093
+ Aozora2Html::Tag::Img.new(self, src, css_class, alt, width, height)
1094
+ else
1095
+ apply_rest_notes(command)
1096
+ end
1097
+ end
1098
+
1099
+ def exec_frontref_command(command)
1100
+ _whole, reference, spec1, spec2 = command.match(PAT_FRONTREF).to_a
1101
+ spec = if spec1
1102
+ spec1 + spec2
1103
+ else
1104
+ spec2
1105
+ end
1106
+ if reference
1107
+ found = search_front_reference(reference)
1108
+ if found
1109
+ tmp = exec_style(found, spec)
1110
+ return tmp if tmp
1111
+
1112
+ recovery_front_reference(found)
1113
+ end
1114
+ end
1115
+ # comment out?
1116
+ apply_rest_notes(command)
1117
+ end
1118
+
1119
+ # 傍記を並べる用
1120
+ #
1121
+ def multiply(bouki, times)
1122
+ sep = '&nbsp;'
1123
+ ([bouki] * times).join(sep)
1124
+ end
1125
+
1126
+ # rubyタグの再生成(本体はrearrange_ruby)
1127
+ #
1128
+ # complex ruby wrap up utilities -- don't erase! we will use soon ...
1129
+ #
1130
+ def rearrange_ruby_tag(targets, upper_ruby, under_ruby)
1131
+ Aozora2Html::Tag::Ruby.rearrange_ruby(self, targets, upper_ruby, under_ruby)
1132
+ end
1133
+
1134
+ def exec_style(targets, command)
1135
+ try_kuten = kuten2png(command)
1136
+ if try_kuten != command
1137
+ try_kuten
1138
+ elsif command.match?(TCY_COMMAND)
1139
+ Aozora2Html::Tag::Dir.new(self, targets)
1140
+ elsif command.match?(YOKOGUMI_COMMAND)
1141
+ Aozora2Html::Tag::InlineYokogumi.new(self, targets)
1142
+ elsif command.match?(KEIGAKOMI_COMMAND)
1143
+ Aozora2Html::Tag::InlineKeigakomi.new(self, targets)
1144
+ elsif command.match?(CAPTION_COMMAND)
1145
+ Aozora2Html::Tag::InlineCaption.new(self, targets)
1146
+ elsif command.match?(KAERITEN_COMMAND)
1147
+ Aozora2Html::Tag::Kaeriten.new(self, targets)
1148
+ elsif command.match?(KUNTEN_OKURIGANA_COMMAND)
1149
+ Aozora2Html::Tag::Okurigana.new(self, targets)
1150
+ elsif command.match?(MIDASHI_COMMAND)
1151
+ midashi_type = :normal
1152
+ if command.match?(DOGYO_MARK)
1153
+ midashi_type = :dogyo
1154
+ elsif command.match?(MADO_MARK)
1155
+ midashi_type = :mado
1156
+ else
1157
+ @terprip = false
1158
+ end
1159
+ Aozora2Html::Tag::Midashi.new(self, targets, command, midashi_type)
1160
+ elsif command.match?(PAT_CHARSIZE)
1161
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1162
+ Aozora2Html::Tag::InlineFontSize.new(self, targets,
1163
+ Utils.convert_japanese_number(nest).to_i,
1164
+ detect_style_size(style))
1165
+ elsif command.match?(PAT_RUBY_DIR)
1166
+ _whole, _dir, under = command.match(PAT_RUBY_DIR).to_a
1167
+ if (targets.length == 1) && targets[0].is_a?(Aozora2Html::Tag::Ruby)
1168
+ tag = targets[0]
1169
+ raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby) unless tag.under_ruby == ''
1170
+
1171
+ tag.under_ruby = under
1172
+ tag
1173
+ else
1174
+ rearrange_ruby_tag(targets, '', under)
1175
+ end
1176
+ elsif command.match?(PAT_CHUUKI)
1177
+ rearrange_ruby_tag(targets, PAT_CHUUKI.match(command).to_a[1], '')
1178
+ elsif command.match?(PAT_BOUKI)
1179
+ rearrange_ruby_tag(targets, multiply(PAT_BOUKI.match(command).to_a[1], targets.to_s.length), '')
1180
+ else
1181
+ ## direction fix! ##
1182
+ filter = ->(x) { x }
1183
+ if command.match?(PAT_DIRECTION)
1184
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
1185
+ # renew command
1186
+ command = com
1187
+ if command.match?(TEN_MARK)
1188
+ case dir
1189
+ when LEFT_MARK, UNDER_MARK
1190
+ filter = ->(x) { "#{x}_after" }
1191
+ end
1192
+ elsif command.match?(SEN_MARK)
1193
+ case dir
1194
+ when LEFT_MARK, OVER_MARK
1195
+ filter = ->(x) { x.sub('under', 'over') }
1196
+ end
1197
+ end
1198
+ end
1199
+
1200
+ found = COMMAND_TABLE[command]
1201
+ # found = [class, tag]
1202
+ if found
1203
+ Aozora2Html::Tag::Decorate.new(self, targets, filter.call(found[0]), found[1])
1204
+ end
1205
+ end
1206
+ end
1207
+
1208
+ def apply_dakuten_katakana(command)
1209
+ n = command.match(/1-7-8([2345])/).to_a[1]
1210
+ frontref = DAKUTEN_KATAKANA_TABLE[n]
1211
+ found = search_front_reference(frontref)
1212
+ if found
1213
+ Aozora2Html::Tag::DakutenKatakana.new(self, n, found.join, gaiji_dir: @gaiji_dir)
1214
+ else
1215
+ apply_rest_notes(command)
1216
+ end
1217
+ end
1218
+
1219
+ # くの字点の処理
1220
+ #
1221
+ # くの字点は現状そのまま出力するのでフッタの「表記について」で出力するかどうかのフラグ処理だけ行う
1222
+ def assign_kunoji
1223
+ second = @stream.peek_char(0)
1224
+ case second
1225
+ when NOJI
1226
+ @chuuki_table[:kunoji] = true
1227
+ when DAKUTEN
1228
+ if @stream.peek_char(1) == NOJI
1229
+ @chuuki_table[:dakutenkunoji] = true
1230
+ end
1231
+ end
1232
+ end
1233
+
1234
+ def apply_rest_notes(command)
1235
+ @chuuki_table[:chuki] = true
1236
+ Aozora2Html::Tag::EditorNote.new(self, command)
1237
+ end
1238
+
1239
+ # |が来たときは文字種を無視してruby_bufを守らなきゃいけない
1240
+ def apply_ruby
1241
+ @ruby_buf.protected = nil
1242
+ ruby, _raw = read_to_nest(RUBY_END_MARK)
1243
+ if ruby.length == 0
1244
+ # escaped ruby character
1245
+ return RUBY_BEGIN_MARK + RUBY_END_MARK
1246
+ end
1247
+
1248
+ @buffer.concat(@ruby_buf.create_ruby(self, ruby))
1249
+
1250
+ nil
1251
+ end
1252
+
1253
+ # parse_bodyのフッタ版
1254
+ def parse_tail
1255
+ char = read_char
1256
+ check = true
1257
+ case char
1258
+ when ACCENT_BEGIN
1259
+ check = false
1260
+ char = read_accent
1261
+ when @endchar
1262
+ throw :terminate
1263
+ when GAIJI_MARK
1264
+ char = dispatch_gaiji
1265
+ when COMMAND_BEGIN
1266
+ char = dispatch_aozora_command
1267
+ when KU
1268
+ assign_kunoji
1269
+ when RUBY_BEGIN_MARK
1270
+ char = apply_ruby
1271
+ end
1272
+
1273
+ case char
1274
+ when "\r\n"
1275
+ tail_output
1276
+ when RUBY_PREFIX
1277
+ @ruby_buf.dump_into(@buffer)
1278
+ @ruby_buf.protected = true
1279
+ when nil
1280
+ # noop
1281
+ else
1282
+ if check
1283
+ Utils.illegal_char_check(char, line_number)
1284
+ end
1285
+ push_chars(escape_special_chars(char))
1286
+ end
1287
+ end
1288
+
1289
+ # general_outputのフッタ版
1290
+ def tail_output
1291
+ @ruby_buf.dump_into(@buffer)
1292
+ string = @buffer.join
1293
+ @buffer = TextBuffer.new
1294
+ string.gsub!('info@aozora.gr.jp', '<a href="mailto: info@aozora.gr.jp">info@aozora.gr.jp</a>')
1295
+ string.gsub!('青空文庫(http://www.aozora.gr.jp/)'.to_sjis) { "<a href=\"http://www.aozora.gr.jp/\">#{$&}</a>" }
1296
+ if string.match?(%r{(<br />$|</p>$|</h\d>$|<div.*>$|</div>$|^<[^>]*>$)})
1297
+ @out.print string, "\r\n"
1298
+ else
1299
+ @out.print string, "<br />\r\n"
1300
+ end
1301
+ end
1302
+
1303
+ # `●表記について`で使用した注記等を出力する
1304
+ def hyoki
1305
+ # <br /> times fix
1306
+ @out.print "<br />\r\n</div>\r\n<div class=\"notation_notes\">\r\n<hr />\r\n<br />\r\n●表記について<br />\r\n<ul>\r\n".to_sjis
1307
+ @out.print "\t<li>このファイルは W3C 勧告 XHTML1.1 にそった形式で作成されています。</li>\r\n".to_sjis
1308
+ if @chuuki_table[:chuki]
1309
+ @out.print "\t<li>[#…]は、入力者による注を表す記号です。</li>\r\n".to_sjis
1310
+ end
1311
+ if @chuuki_table[:kunoji]
1312
+ if @chuuki_table[:dakutenkunoji]
1313
+ @out.printf("\t<li>「くの字点」は「%s」で、「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI, KU + DAKUTEN + NOJI)
1314
+ else
1315
+ @out.printf("\t<li>「くの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI)
1316
+ end
1317
+ elsif @chuuki_table[:dakutenkunoji]
1318
+ @out.printf("\t<li>「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + DAKUTEN + NOJI)
1319
+ end
1320
+ if @chuuki_table[:newjis] && !Aozora2Html::Tag::EmbedGaiji.use_jisx0213
1321
+ @out.print "\t<li>「くの字点」をのぞくJIS X 0213にある文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1322
+ end
1323
+ if @chuuki_table[:accent] && !Aozora2Html::Tag::Accent.use_jisx0213
1324
+ @out.print "\t<li>アクセント符号付きラテン文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1325
+ end
1326
+ if @images[0]
1327
+ @out.print "\t<li>この作品には、JIS X 0213にない、以下の文字が用いられています。(数字は、底本中の出現「ページ-行」数。)これらの文字は本文内では「※[#…]」の形で示しました。</li>\r\n</ul>\r\n<br />\r\n\t\t<table class=\"gaiji_list\">\r\n".to_sjis
1328
+ @images.each do |cell|
1329
+ k, *v = cell
1330
+ vs = v.join('、'.to_sjis)
1331
+ @out.print " <tr>
1332
+ <td>
1333
+ #{k}
1334
+ </td>
1335
+ <td>&nbsp;&nbsp;</td>
1336
+ <td>
1337
+ #{vs} </td>
1338
+ <!--
1339
+ <td>
1340
+ " + '  '.to_sjis + "<img src=\"../../../gaiji/others/xxxx.png\" alt=\"#{k}\" width=32 height=32 />
1341
+ </td>
1342
+ -->
1343
+ </tr>
1344
+ ".to_sjis
1345
+ end
1346
+ @out.print "\t\t</table>\r\n".to_sjis
1347
+ else
1348
+ @out.print "</ul>\r\n" # <ul>内に<li>以外のエレメントが来るのは不正なので修正
1349
+ end
1350
+ @out.print "</div>\r\n"
1351
+ end
1352
+
1353
+ # Original Aozora2Html#push_chars does not convert "'" into '&#39;'; it's old behaivor of CGI.escapeHTML().
1354
+ def escape_special_chars(char)
1355
+ if char.is_a?(String)
1356
+ char.gsub(/[&"<>]/, { '&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;' })
1357
+ else
1358
+ char
1359
+ end
1360
+ end
10
1361
  end