aozora2html 2.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +5 -3
  4. data/.gitignore +1 -0
  5. data/.rubocop.yml +38 -151
  6. data/.rubocop_todo.yml +7 -0
  7. data/CHANGELOG.md +41 -0
  8. data/Gemfile +2 -0
  9. data/Guardfile +3 -1
  10. data/HACKING.md +45 -0
  11. data/README.md +14 -6
  12. data/Rakefile +12 -5
  13. data/aozora2html.gemspec +25 -23
  14. data/bin/aozora2html +21 -19
  15. data/lib/aozora2html/accent_parser.rb +62 -54
  16. data/lib/aozora2html/error.rb +5 -4
  17. data/lib/aozora2html/header.rb +20 -18
  18. data/lib/aozora2html/i18n.rb +40 -20
  19. data/lib/aozora2html/midashi_counter.rb +32 -0
  20. data/lib/aozora2html/ruby_buffer.rb +63 -28
  21. data/lib/aozora2html/string_refinements.rb +36 -0
  22. data/lib/aozora2html/style_stack.rb +6 -0
  23. data/lib/aozora2html/tag/accent.rb +10 -12
  24. data/lib/aozora2html/tag/block.rb +11 -9
  25. data/lib/aozora2html/tag/chitsuki.rb +6 -2
  26. data/lib/aozora2html/tag/dakuten_katakana.rb +10 -8
  27. data/lib/aozora2html/tag/decorate.rb +4 -3
  28. data/lib/aozora2html/tag/dir.rb +4 -2
  29. data/lib/aozora2html/tag/editor_note.rb +7 -4
  30. data/lib/aozora2html/tag/embed_gaiji.rb +15 -11
  31. data/lib/aozora2html/tag/font_size.rb +5 -2
  32. data/lib/aozora2html/tag/gaiji.rb +4 -3
  33. data/lib/aozora2html/tag/img.rb +4 -4
  34. data/lib/aozora2html/tag/indent.rb +3 -3
  35. data/lib/aozora2html/tag/inline.rb +10 -7
  36. data/lib/aozora2html/tag/inline_caption.rb +4 -2
  37. data/lib/aozora2html/tag/inline_font_size.rb +4 -3
  38. data/lib/aozora2html/tag/inline_keigakomi.rb +4 -2
  39. data/lib/aozora2html/tag/inline_yokogumi.rb +4 -3
  40. data/lib/aozora2html/tag/jisage.rb +3 -1
  41. data/lib/aozora2html/tag/jizume.rb +3 -0
  42. data/lib/aozora2html/tag/kaeriten.rb +4 -2
  43. data/lib/aozora2html/tag/keigakomi.rb +15 -9
  44. data/lib/aozora2html/tag/kunten.rb +4 -4
  45. data/lib/aozora2html/tag/midashi.rb +3 -1
  46. data/lib/aozora2html/tag/multiline.rb +3 -0
  47. data/lib/aozora2html/tag/multiline_caption.rb +6 -8
  48. data/lib/aozora2html/tag/multiline_chitsuki.rb +3 -1
  49. data/lib/aozora2html/tag/multiline_jisage.rb +3 -1
  50. data/lib/aozora2html/tag/multiline_midashi.rb +6 -3
  51. data/lib/aozora2html/tag/multiline_style.rb +5 -3
  52. data/lib/aozora2html/tag/multiline_yokogumi.rb +6 -9
  53. data/lib/aozora2html/tag/okurigana.rb +4 -2
  54. data/lib/aozora2html/tag/oneline_chitsuki.rb +3 -2
  55. data/lib/aozora2html/tag/oneline_indent.rb +8 -1
  56. data/lib/aozora2html/tag/oneline_jisage.rb +3 -0
  57. data/lib/aozora2html/tag/reference_mentioned.rb +22 -21
  58. data/lib/aozora2html/tag/ruby.rb +174 -70
  59. data/lib/aozora2html/tag/un_embed_gaiji.rb +8 -2
  60. data/lib/aozora2html/tag.rb +40 -38
  61. data/lib/aozora2html/tag_parser.rb +23 -16
  62. data/lib/aozora2html/text_buffer.rb +50 -0
  63. data/lib/aozora2html/utils.rb +113 -50
  64. data/lib/aozora2html/version.rb +3 -1
  65. data/lib/aozora2html/yaml_loader.rb +8 -2
  66. data/lib/aozora2html/zip.rb +4 -0
  67. data/lib/aozora2html.rb +1329 -3
  68. data/lib/extensions.rb +2 -34
  69. data/lib/jstream.rb +96 -25
  70. data/sample/chukiichiran_kinyurei.html +42 -29
  71. data/sample/chukiichiran_kinyurei.txt +16 -3
  72. metadata +80 -83
  73. data/.travis.yml +0 -12
  74. data/lib/t2hs.rb +0 -1607
  75. data/test/test_aozora2html.rb +0 -379
  76. data/test/test_aozora_accent_parser.rb +0 -31
  77. data/test/test_command_parse.rb +0 -213
  78. data/test/test_compat.rb +0 -11
  79. data/test/test_dakuten_katakana_tag.rb +0 -27
  80. data/test/test_decorate_tag.rb +0 -24
  81. data/test/test_dir_tag.rb +0 -24
  82. data/test/test_editor_note_tag.rb +0 -22
  83. data/test/test_exception.rb +0 -23
  84. data/test/test_font_size_tag.rb +0 -41
  85. data/test/test_gaiji_tag.rb +0 -49
  86. data/test/test_header.rb +0 -60
  87. data/test/test_helper.rb +0 -5
  88. data/test/test_i18n.rb +0 -23
  89. data/test/test_img_tag.rb +0 -23
  90. data/test/test_inline_caption_tag.rb +0 -24
  91. data/test/test_inline_font_size_tag.rb +0 -34
  92. data/test/test_inline_keigakomi_tag.rb +0 -24
  93. data/test/test_inline_yokogumi_tag.rb +0 -24
  94. data/test/test_jizume_tag.rb +0 -25
  95. data/test/test_jstream.rb +0 -57
  96. data/test/test_kaeriten_tag.rb +0 -24
  97. data/test/test_keigakomi_tag.rb +0 -31
  98. data/test/test_midashi_tag.rb +0 -38
  99. data/test/test_multiline_caption_tag.rb +0 -25
  100. data/test/test_multiline_midashi_tag.rb +0 -55
  101. data/test/test_multiline_style_tag.rb +0 -25
  102. data/test/test_multiline_yokogumi_tag.rb +0 -25
  103. data/test/test_okurigana_tag.rb +0 -24
  104. data/test/test_ruby_parse.rb +0 -130
  105. data/test/test_ruby_tag.rb +0 -24
  106. data/test/test_tag_parser.rb +0 -80
data/lib/aozora2html.rb CHANGED
@@ -1,6 +1,1332 @@
1
- require "aozora2html/version"
2
- require 't2hs'
1
+ require_relative 'aozora2html/version'
2
+ require_relative 'extensions'
3
+ require_relative 'aozora2html/error'
4
+ require_relative 'aozora2html/i18n'
5
+ require_relative 'aozora2html/midashi_counter'
6
+ require_relative 'jstream'
7
+ require_relative 'aozora2html/tag'
8
+ require_relative 'aozora2html/tag_parser'
9
+ require_relative 'aozora2html/accent_parser'
10
+ require_relative 'aozora2html/style_stack'
11
+ require_relative 'aozora2html/header'
12
+ require_relative 'aozora2html/ruby_buffer'
13
+ require_relative 'aozora2html/text_buffer'
14
+ require_relative 'aozora2html/yaml_loader'
15
+ require_relative 'aozora2html/utils'
16
+ require_relative 'aozora2html/string_refinements'
3
17
 
4
- ## already defined in t2hs.rb
18
+ # 青空文庫形式のテキストファイルを html に整形する ruby スクリプト
19
+ # 変換器本体
5
20
  class Aozora2Html
21
+ # 全角バックスラッシュが出せないから直打ち
22
+ KU = ['18e5'].pack('h*').force_encoding('shift_jis')
23
+ NOJI = ['18f5'].pack('h*').force_encoding('shift_jis')
24
+ DAKUTEN = ['18d8'].pack('h*').force_encoding('shift_jis')
25
+
26
+ using StringRefinements
27
+
28
+ GAIJI_MARK = '※'.to_sjis
29
+ IGETA_MARK = '#'.to_sjis
30
+ RUBY_BEGIN_MARK = '《'.to_sjis
31
+ RUBY_END_MARK = '》'.to_sjis
32
+ PAREN_BEGIN_MARK = '('.to_sjis
33
+ PAREN_END_MARK = ')'.to_sjis
34
+ SIZE_SMALL = '小'.to_sjis
35
+ SIZE_MIDDLE = '中'.to_sjis
36
+ SIZE_LARGE = '大'.to_sjis
37
+ TEIHON_MARK = '底本:'.to_sjis
38
+ COMMAND_BEGIN = '['.to_sjis
39
+ COMMAND_END = ']'.to_sjis
40
+ ACCENT_BEGIN = '〔'.to_sjis
41
+ ACCENT_END = '〕'.to_sjis
42
+ AOZORABUNKO = '青空文庫'.to_sjis
43
+ # PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/
44
+ PAT_EDITOR = '(校訂|編|編集)$'.to_sjis
45
+ PAT_HENYAKU = '編訳$'.to_sjis
46
+ PAT_TRANSLATOR = '訳$'.to_sjis
47
+ RUBY_PREFIX = '|'.to_sjis
48
+ PAT_RUBY = /#{'《.*?》'.to_sjis}/.freeze
49
+ PAT_DIRECTION = '(右|左|上|下)に(.*)'.to_sjis
50
+ PAT_REF = '^「.+」'.to_sjis
51
+ CHUUKI_COMMAND = '注記付き'.to_sjis
52
+ TCY_COMMAND = '縦中横'.to_sjis
53
+ KEIGAKOMI_COMMAND = '罫囲み'.to_sjis
54
+ YOKOGUMI_COMMAND = '横組み'.to_sjis
55
+ CAPTION_COMMAND = 'キャプション'.to_sjis
56
+ WARIGAKI_COMMAND = '割書'.to_sjis
57
+ KAERITEN_COMMAND = '返り点'.to_sjis
58
+ KUNTEN_OKURIGANA_COMMAND = '訓点送り仮名'.to_sjis
59
+ MIDASHI_COMMAND = '見出し'.to_sjis
60
+ OMIDASHI_COMMAND = '大見出し'.to_sjis
61
+ NAKAMIDASHI_COMMAND = '中見出し'.to_sjis
62
+ KOMIDASHI_COMMAND = '小見出し'.to_sjis
63
+ DOGYO_OMIDASHI_COMMAND = '同行大見出し'.to_sjis
64
+ DOGYO_NAKAMIDASHI_COMMAND = '同行中見出し'.to_sjis
65
+ DOGYO_KOMIDASHI_COMMAND = '同行小見出し'.to_sjis
66
+ MADO_OMIDASHI_COMMAND = '窓大見出し'.to_sjis
67
+ MADO_NAKAMIDASHI_COMMAND = '窓中見出し'.to_sjis
68
+ MADO_KOMIDASHI_COMMAND = '窓小見出し'.to_sjis
69
+ LEFT_MARK = '左'.to_sjis
70
+ UNDER_MARK = '下'.to_sjis
71
+ OVER_MARK = '上'.to_sjis
72
+ MAIN_MARK = '本文'.to_sjis
73
+ END_MARK = '終わり'.to_sjis
74
+ TEN_MARK = '点'.to_sjis
75
+ SEN_MARK = '線'.to_sjis
76
+ OPEN_MARK = 'ここから'.to_sjis
77
+ CLOSE_MARK = 'ここで'.to_sjis
78
+ MADE_MARK = 'まで'.to_sjis
79
+ DOGYO_MARK = '同行'.to_sjis
80
+ MADO_MARK = '窓'.to_sjis
81
+ JIAGE_COMMAND = '字上げ'.to_sjis
82
+ JISAGE_COMMAND = '字下げ'.to_sjis
83
+ PHOTO_COMMAND = '写真'.to_sjis
84
+ ORIKAESHI_COMMAND = '折り返して'.to_sjis
85
+ ONELINE_COMMAND = 'この行'.to_sjis
86
+ NON_0213_GAIJI = '非0213外字'.to_sjis
87
+ WARICHU_COMMAND = '割り注'.to_sjis
88
+ TENTSUKI_COMMAND = '天付き'.to_sjis
89
+ PAT_REST_NOTES = '(左|下)に「(.*)」の(ルビ|注記|傍記)'.to_sjis
90
+ PAT_KUTEN = /#{'「※」[は|の]'.to_sjis}/.freeze
91
+ PAT_KUTEN_DUAL = '※.*※'.to_sjis
92
+ PAT_GAIJI = '(?:#)(.*)(?:、)(.*)'.to_sjis
93
+ PAT_KAERITEN = '^([一二三四五六七八九十レ上中下甲乙丙丁天地人]+)$'.to_sjis
94
+ PAT_OKURIGANA = '^((.+))$'.to_sjis
95
+ PAT_REMOVE_OKURIGANA = /#{'[()]'.to_sjis}/.freeze
96
+ PAT_CHITSUKI = /#{'(地付き|字上げ)(終わり)*$'.to_sjis}/.freeze
97
+ PAT_ORIKAESHI_JISAGE = '折り返して(\\d*)字下げ'.to_sjis
98
+ PAT_ORIKAESHI_JISAGE2 = '(\\d*)字下げ、折り返して(\\d*)字下げ'.to_sjis
99
+ PAT_JI_LEN = '([0-9]+)字'.to_sjis
100
+ PAT_INLINE_RUBY = '「(.*)」の注記付き'.to_sjis
101
+ PAT_IMAGE = '(.*)((fig.+\\.png)(、横([0-9]+)×縦([0-9]+))*)入る'.to_sjis
102
+ PAT_FRONTREF = '「([^「」]*(?:「.+」)*[^「」]*)」[にはの](「.+」の)*(.+)'.to_sjis
103
+ PAT_RUBY_DIR = '(左|下)に「([^」]*)」の(ルビ|注記)'.to_sjis
104
+ PAT_CHUUKI = /#{'「(.+?)」の注記'.to_sjis}/.freeze
105
+ PAT_BOUKI = /#{'「(.)」の傍記'.to_sjis}/.freeze
106
+ PAT_CHARSIZE = /#{'(.*)段階(..)な文字'.to_sjis}/.freeze
107
+
108
+ REGEX_HIRAGANA = Regexp.new('[ぁ-んゝゞ]'.to_sjis)
109
+ REGEX_KATAKANA = Regexp.new('[ァ-ンーヽヾヴ]'.to_sjis)
110
+ REGEX_ZENKAKU = Regexp.new('[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]'.to_sjis)
111
+ REGEX_HANKAKU = Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".to_sjis)
112
+ REGEX_KANJI = Regexp.new('[亜-熙々※仝〆〇ヶ]'.to_sjis)
113
+
114
+ DYNAMIC_CONTENTS = "<div id=\"card\">\r\n<hr />\r\n<br />\r\n<a href=\"JavaScript:goLibCard();\" id=\"goAZLibCard\">●図書カード</a><script type=\"text/javascript\" src=\"../../contents.js\"></script>\r\n<script type=\"text/javascript\" src=\"../../golibcard.js\"></script>\r\n</div>".to_sjis
115
+
116
+ # KUNOJI = ["18e518f5"].pack("h*")
117
+ # utf8 ["fecbf8fecbcb"].pack("h*")
118
+ # DAKUTENKUNOJI = ["18e518d818f5"].pack("h*")
119
+ # utf8 ["fecbf82e083bfecbcb"].pack("h*")
120
+
121
+ loader = Aozora2Html::YamlLoader.new(File.dirname(__FILE__))
122
+ ACCENT_TABLE = loader.load('../yml/accent_table.yml')
123
+
124
+ # [class, tag]
125
+ COMMAND_TABLE = loader.load('../yml/command_table.yml')
126
+ JIS2UCS = loader.load('../yml/jis2ucs.yml')
127
+
128
+ INDENT_TYPE = {
129
+ jisage: '字下げ'.to_sjis,
130
+ chitsuki: '地付き'.to_sjis,
131
+ midashi: '見出し'.to_sjis,
132
+ jizume: '字詰め'.to_sjis,
133
+ yokogumi: '横組み'.to_sjis,
134
+ keigakomi: '罫囲み'.to_sjis,
135
+ caption: 'キャプション'.to_sjis,
136
+ futoji: '太字'.to_sjis,
137
+ shatai: '斜体'.to_sjis,
138
+ dai: '大きな文字'.to_sjis,
139
+ sho: '小さな文字'.to_sjis
140
+ }.freeze
141
+
142
+ DAKUTEN_KATAKANA_TABLE = {
143
+ '2' => 'ワ゛'.to_sjis,
144
+ '3' => 'ヰ゛'.to_sjis,
145
+ '4' => 'ヱ゛'.to_sjis,
146
+ '5' => 'ヲ゛'.to_sjis
147
+ }.freeze
148
+
149
+ def initialize(input, output, gaiji_dir: nil, css_files: nil)
150
+ @stream = if input.respond_to?(:read) ## readable IO?
151
+ Jstream.new(input)
152
+ else
153
+ Jstream.new(File.open(input, 'rb:Shift_JIS'))
154
+ end
155
+ @out = if output.respond_to?(:print) ## writable IO?
156
+ output
157
+ else
158
+ File.open(output, 'w')
159
+ end
160
+ @gaiji_dir = gaiji_dir || '../../../gaiji/'
161
+ @css_files = css_files || ['../../aozora.css']
162
+
163
+ @buffer = TextBuffer.new
164
+ @ruby_buf = RubyBuffer.new
165
+ @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
166
+ @header = Aozora2Html::Header.new(css_files: @css_files) ## ヘッダ行の配列
167
+ @style_stack = StyleStack.new ## スタイルのスタック
168
+ @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
169
+ @images = [] ## 使用した外字の画像保持用
170
+ @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
171
+ @tag_stack = []
172
+ @midashi_counter = MidashiCounter.new(0) ## 見出しのカウンタ、見出しの種類によって増分が異なる
173
+ @terprip = true ## 改行制御用 (terpriはLisp由来?)
174
+ @endchar = :eof ## 解析終了文字、AccentParserやTagParserでは異なる
175
+ @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
176
+ end
177
+
178
+ def line_number
179
+ @stream.line
180
+ end
181
+
182
+ def block_allowed_context?
183
+ # inline_tagが開いていないかチェックすれば十分
184
+ @style_stack.empty?
185
+ end
186
+
187
+ # parseする
188
+ #
189
+ # 終了時(終端まで来た場合)にはthrow :terminateで脱出する
190
+ #
191
+ def process
192
+ catch(:terminate) do
193
+ parse
194
+ rescue Aozora2Html::Error => e
195
+ puts e.message(line_number)
196
+ if e.is_a?(Aozora2Html::Error)
197
+ exit(2)
198
+ end
199
+ end
200
+ tail_output # final call
201
+ finalize
202
+ close
203
+ rescue StandardError => e
204
+ puts "ERROR: line: #{line_number}"
205
+ raise e
206
+ end
207
+
208
+ def new_midashi_id(size)
209
+ @midashi_counter.generate_id(size)
210
+ end
211
+
212
+ def kuten2png(substring)
213
+ desc = substring.gsub(PAT_KUTEN, '')
214
+ matched = desc.match(/[12]-\d{1,2}-\d{1,2}/)
215
+ if matched && !desc.match?(NON_0213_GAIJI) && !desc.match?(PAT_KUTEN_DUAL)
216
+ @chuuki_table[:newjis] = true
217
+ codes = matched[0].split('-')
218
+ folder = sprintf('%1d-%02d', codes[0], codes[1])
219
+ code = sprintf('%1d-%02d-%02d', *codes)
220
+ Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK, ''), gaiji_dir: @gaiji_dir)
221
+ else
222
+ substring
223
+ end
224
+ end
225
+
226
+ # コマンド文字列からモードのシンボルを取り出す
227
+ #
228
+ # @return [Symbol]
229
+ #
230
+ def detect_command_mode(command)
231
+ if command.match?(INDENT_TYPE[:chitsuki] + END_MARK) || command.match?(JIAGE_COMMAND + END_MARK)
232
+ return :chitsuki
233
+ end
234
+
235
+ INDENT_TYPE.each_key do |key|
236
+ if command.match?(INDENT_TYPE[key])
237
+ return key
238
+ end
239
+ end
240
+ nil
241
+ end
242
+
243
+ private
244
+
245
+ # 一文字読み込む
246
+ def read_char
247
+ @stream.read_char
248
+ end
249
+
250
+ # 一行読み込む
251
+ def read_line
252
+ @stream.read_line
253
+ end
254
+
255
+ def read_accent
256
+ Aozora2Html::AccentParser.new(@stream, ACCENT_END, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
257
+ end
258
+
259
+ def read_to_nest(endchar)
260
+ Aozora2Html::TagParser.new(@stream, endchar, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
261
+ end
262
+
263
+ def finalize
264
+ hyoki
265
+ dynamic_contents
266
+ @out.print("</body>\r\n</html>\r\n")
267
+ end
268
+
269
+ def dynamic_contents
270
+ @out.print DYNAMIC_CONTENTS
271
+ end
272
+
273
+ def close
274
+ @stream.close
275
+ @out.close
276
+ end
277
+
278
+ # 記法のシンボル名から文字列へ変換する
279
+ # シンボルが見つからなければそのまま返す
280
+ def convert_indent_type(type)
281
+ INDENT_TYPE[type] || type
282
+ end
283
+
284
+ def check_close_match(type)
285
+ ind = if @indent_stack.last.is_a?(String)
286
+ @noprint = true
287
+ :jisage
288
+ else
289
+ @indent_stack.last
290
+ end
291
+ if ind == type
292
+ nil
293
+ else
294
+ convert_indent_type(type)
295
+ end
296
+ end
297
+
298
+ def implicit_close(type)
299
+ return unless @indent_stack.last
300
+
301
+ if check_close_match(type)
302
+ # ok, nested multiline tags, go ahead
303
+ else
304
+ # not nested, please close
305
+ @indent_stack.pop
306
+ tag = @tag_stack.pop
307
+ if tag
308
+ push_chars(tag)
309
+ end
310
+ end
311
+ end
312
+
313
+ # 本文が終わってよいかチェックし、終わっていなければ例外をあげる
314
+ def ensure_close
315
+ n = @indent_stack.last
316
+ return unless n
317
+
318
+ raise Aozora2Html::Error, I18n.t(:terminate_in_style, convert_indent_type(n))
319
+ end
320
+
321
+ def explicit_close(type)
322
+ n = check_close_match(type)
323
+ if n
324
+ raise Aozora2Html::Error, I18n.t(:invalid_closing, n, n)
325
+ end
326
+
327
+ tag = @tag_stack.pop
328
+ return unless tag
329
+
330
+ push_chars(tag)
331
+ end
332
+
333
+ # main loop
334
+ def parse
335
+ loop do
336
+ case @section
337
+ when :head
338
+ parse_header
339
+ when :head_end
340
+ judge_chuuki
341
+ when :chuuki, :chuuki_in
342
+ parse_chuuki
343
+ when :body
344
+ parse_body
345
+ when :tail
346
+ parse_tail
347
+ else
348
+ raise Aozora2Html::Error, 'encount undefined condition'
349
+ end
350
+ end
351
+ end
352
+
353
+ def judge_chuuki
354
+ # 注記が入るかどうかチェック
355
+ i = 0
356
+ loop do
357
+ case @stream.peek_char(i)
358
+ when '-'
359
+ i += 1
360
+ when "\r\n"
361
+ @section = if i == 0
362
+ :body
363
+ else
364
+ :chuuki
365
+ end
366
+ return
367
+ else
368
+ @section = :body
369
+ @out.print("<br />\r\n")
370
+ return
371
+ end
372
+ end
373
+ end
374
+
375
+ # headerは一行ずつ読む
376
+ def parse_header
377
+ string = read_line
378
+ # refine from Tomita 09/06/14
379
+ if string == '' # 空行がくれば、そこでヘッダー終了とみなす
380
+ @section = :head_end
381
+ @out.print @header.to_html
382
+ else
383
+ string.gsub!(RUBY_PREFIX, '')
384
+ string.gsub!(PAT_RUBY, '')
385
+ @header.push(string)
386
+ end
387
+ end
388
+
389
+ def parse_chuuki
390
+ string = read_line
391
+ return unless string.match?(/^-+$/)
392
+
393
+ case @section
394
+ when :chuuki
395
+ @section = :chuuki_in
396
+ when :chuuki_in
397
+ @section = :body
398
+ end
399
+ end
400
+
401
+ # 本体解析部
402
+ #
403
+ # 1文字ずつ読み込み、dispatchして@buffer,@ruby_bufへしまう
404
+ # 改行コードに当たったら溜め込んだものをgeneral_outputする
405
+ #
406
+ def parse_body
407
+ char = read_char
408
+ check = true
409
+ case char
410
+ when ACCENT_BEGIN
411
+ check = false
412
+ char = read_accent
413
+ when TEIHON_MARK[0]
414
+ if @buffer.length == 0
415
+ ending_check
416
+ end
417
+ when GAIJI_MARK
418
+ char = dispatch_gaiji
419
+ when COMMAND_BEGIN
420
+ char = dispatch_aozora_command
421
+ when KU
422
+ assign_kunoji
423
+ when RUBY_BEGIN_MARK
424
+ char = apply_ruby
425
+ end
426
+
427
+ case char
428
+ when "\r\n"
429
+ general_output
430
+ when RUBY_PREFIX
431
+ @ruby_buf.dump_into(@buffer)
432
+ @ruby_buf.protected = true
433
+ when @endchar
434
+ # suddenly finished the file
435
+ puts I18n.t(:warn_unexpected_terminator, line_number)
436
+ throw :terminate
437
+ when nil
438
+ # noop
439
+ else
440
+ if check
441
+ Utils.illegal_char_check(char, line_number)
442
+ end
443
+ push_chars(escape_special_chars(char))
444
+ end
445
+ end
446
+
447
+ # 本文が終了したかどうかチェックする
448
+ #
449
+ #
450
+ def ending_check
451
+ # `底本:`でフッタ(:tail)に遷移
452
+ return unless @stream.peek_char(0) == TEIHON_MARK[1] && @stream.peek_char(1) == TEIHON_MARK[2]
453
+
454
+ @section = :tail
455
+ ensure_close
456
+ @out.print "</div>\r\n<div class=\"bibliographical_information\">\r\n<hr />\r\n<br />\r\n"
457
+ end
458
+
459
+ def push_chars(obj)
460
+ case obj
461
+ when Array
462
+ obj.each do |x|
463
+ push_chars(x)
464
+ end
465
+ when String
466
+ obj.each_char do |x|
467
+ push_char(x)
468
+ end
469
+ else
470
+ push_char(obj)
471
+ end
472
+ end
473
+
474
+ def push_char(char)
475
+ @ruby_buf.push_char(char, @buffer)
476
+ end
477
+
478
+ # 読み込んだ行の出力を行う
479
+ #
480
+ # parserが改行文字を読み込んだら呼ばれる。
481
+ # 最終的に@ruby_bufと@bufferは初期化する
482
+ #
483
+ # @return [void]
484
+ #
485
+ def general_output
486
+ if @style_stack.last
487
+ raise Aozora2Html::Error, I18n.t(:dont_crlf_in_style, @style_stack.last_command)
488
+ end
489
+
490
+ # bufferにインデントタグだけがあったら改行しない!
491
+ if @noprint
492
+ @noprint = false
493
+ return
494
+ end
495
+ @ruby_buf.dump_into(@buffer)
496
+ buf = @buffer
497
+ @buffer = TextBuffer.new
498
+ tail = []
499
+
500
+ indent_type = buf.blank_type
501
+ terprip = buf.terpri? && @terprip
502
+ @terprip = true
503
+
504
+ if @indent_stack.last.is_a?(String) && !indent_type
505
+ @out.print @indent_stack.last
506
+ end
507
+
508
+ buf.each do |s|
509
+ if s.is_a?(Aozora2Html::Tag::OnelineIndent)
510
+ tail.unshift(s.close_tag)
511
+ elsif s.is_a?(Aozora2Html::Tag::UnEmbedGaiji) && !s.escaped?
512
+ # 消してあった※を復活させて
513
+ @out.print GAIJI_MARK
514
+ end
515
+ @out.print s.to_s
516
+ end
517
+
518
+ # 最後はCRLFを出力する
519
+ if @indent_stack.last.is_a?(String)
520
+ # ぶら下げindent
521
+ # tail always active
522
+ @out.print tail.map(&:to_s).join
523
+ if indent_type == :inline
524
+ @out.print "\r\n"
525
+ elsif indent_type && terprip
526
+ @out.print "<br />\r\n"
527
+ else
528
+ @out.print "</div>\r\n"
529
+ end
530
+ elsif tail.empty? && terprip
531
+ @out.print "<br />\r\n"
532
+ else
533
+ @out.print tail.map(&:to_s).join
534
+ @out.print "\r\n"
535
+ end
536
+ end
537
+
538
+ # 前方参照の発見 Ruby,style重ねがけ等々のため、要素の配列で返す
539
+ #
540
+ # 前方参照は`○○[#「○○」に傍点]`、`吹喋[#「喋」に「ママ」の注記]`といった表記
541
+ #
542
+ # @return [TextBuffer|false]
543
+ def search_front_reference(string)
544
+ if string.length == 0
545
+ return false
546
+ end
547
+
548
+ searching_buf = if @ruby_buf.present?
549
+ @ruby_buf.to_a
550
+ else
551
+ @buffer
552
+ end
553
+ last_string = searching_buf.last
554
+ case last_string
555
+ when String
556
+ if last_string == ''
557
+ searching_buf.pop
558
+ search_front_reference(string)
559
+ elsif last_string.match?(Regexp.new("#{Regexp.quote(string)}$"))
560
+ # 完全一致
561
+ # start = match.begin(0)
562
+ # tail = match.end(0)
563
+ # last_string[start,tail-start] = ""
564
+ searching_buf.pop
565
+ searching_buf.push(last_string.sub(Regexp.new("#{Regexp.quote(string)}$"), ''))
566
+ TextBuffer.new([string])
567
+ elsif string.match?(Regexp.new("#{Regexp.quote(last_string)}$"))
568
+ # 部分一致
569
+ tmp = searching_buf.pop
570
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(last_string)}$"), ''))
571
+ if found
572
+ found.push(tmp)
573
+ found
574
+ else
575
+ searching_buf.push(tmp)
576
+ false
577
+ end
578
+ end
579
+ when Aozora2Html::Tag::ReferenceMentioned
580
+ inner = last_string.target_string
581
+ if inner == string
582
+ # 完全一致
583
+ searching_buf.pop
584
+ TextBuffer.new([last_string])
585
+ elsif string.match?(Regexp.new("#{Regexp.quote(inner)}$"))
586
+ # 部分一致
587
+ tmp = searching_buf.pop
588
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(inner)}$"), ''))
589
+ if found
590
+ found.push(tmp)
591
+ found
592
+ else
593
+ searching_buf.push(tmp)
594
+ false
595
+ end
596
+ end
597
+ else
598
+ false
599
+ end
600
+ end
601
+
602
+ # 発見した前方参照を元に戻す
603
+ #
604
+ # @ruby_bufがあれば@ruby_bufに、なければ@bufferにpushする
605
+ # バッファの最後と各要素が文字列ならconcatし、どちらが文字列でなければ(concatできないので)pushする
606
+ #
607
+ # @return [void]
608
+ #
609
+ def recovery_front_reference(reference)
610
+ reference.each do |elt|
611
+ # if @ruby_buf.protected
612
+ if @ruby_buf.present?
613
+ @ruby_buf.push(elt)
614
+ elsif @buffer.last.is_a?(String)
615
+ if elt.is_a?(String)
616
+ @buffer.last.concat(elt)
617
+ else
618
+ @buffer.push(elt)
619
+ end
620
+ else # rubocop:disable Lint/DuplicateBranch
621
+ @ruby_buf.push(elt)
622
+ end
623
+ end
624
+ end
625
+
626
+ def escape_gaiji(command)
627
+ _whole, kanji, line = command.match(PAT_GAIJI).to_a
628
+ tmp = @images.assoc(kanji)
629
+ if tmp
630
+ tmp.push(line)
631
+ else
632
+ @images.push([kanji, line])
633
+ end
634
+ Aozora2Html::Tag::UnEmbedGaiji.new(self, command)
635
+ end
636
+
637
+ def dispatch_gaiji
638
+ # 「※」の次が「[」でなければ外字ではない
639
+ if @stream.peek_char(0) != COMMAND_BEGIN
640
+ return GAIJI_MARK
641
+ end
642
+
643
+ # 「[」を読み捨てる
644
+ _ = read_char
645
+ # embed?
646
+ command, _raw = read_to_nest(COMMAND_END)
647
+ try_emb = kuten2png(command)
648
+ if try_emb != command
649
+ return try_emb
650
+ end
651
+
652
+ matched = command.match(/U\+([0-9A-F]{4,5})/)
653
+ if matched && Aozora2Html::Tag::EmbedGaiji.use_unicode
654
+ unicode_num = matched[1]
655
+ Aozora2Html::Tag::EmbedGaiji.new(self, nil, nil, command, unicode_num, gaiji_dir: @gaiji_dir)
656
+ else
657
+ # Unemb
658
+ escape_gaiji(command)
659
+ end
660
+ end
661
+
662
+ # 注記記法の場合分け
663
+ def dispatch_aozora_command
664
+ # 「[」の次が「#」でなければ注記ではない
665
+ if @stream.peek_char(0) != IGETA_MARK
666
+ return COMMAND_BEGIN
667
+ end
668
+
669
+ # 「#」を読み捨てる
670
+ _ = read_char
671
+ command, raw = read_to_nest(COMMAND_END)
672
+ # 適用順序はこれで大丈夫か? 誤爆怖いよ誤爆
673
+ if command.match?(ORIKAESHI_COMMAND)
674
+ apply_burasage(command)
675
+
676
+ elsif command.start_with?(OPEN_MARK)
677
+ exec_block_start_command(command)
678
+ elsif command.start_with?(CLOSE_MARK)
679
+ exec_block_end_command(command)
680
+
681
+ elsif command.match?(WARICHU_COMMAND)
682
+ apply_warichu(command)
683
+ elsif command.match?(JISAGE_COMMAND)
684
+ apply_jisage(command)
685
+ elsif command.match?(/fig(\d)+_(\d)+\.png/)
686
+ exec_img_command(command, raw)
687
+ # avoid to try complex ruby -- escape to notes
688
+ elsif command.match?(PAT_REST_NOTES)
689
+ apply_rest_notes(command)
690
+ elsif command.end_with?(END_MARK)
691
+ exec_inline_end_command(command)
692
+ nil
693
+ elsif command.match?(PAT_REF)
694
+ exec_frontref_command(command)
695
+ elsif command.match?(/1-7-8[2345]/)
696
+ apply_dakuten_katakana(command)
697
+ elsif command.match?(PAT_KAERITEN)
698
+ Aozora2Html::Tag::Kaeriten.new(self, command)
699
+ elsif command.match?(PAT_OKURIGANA)
700
+ Aozora2Html::Tag::Okurigana.new(self, command.gsub!(PAT_REMOVE_OKURIGANA, ''))
701
+ elsif command.match?(PAT_CHITSUKI)
702
+ apply_chitsuki(command)
703
+ elsif exec_inline_start_command(command)
704
+ nil
705
+ else # rubocop:disable Lint/DuplicateBranch
706
+ apply_rest_notes(command)
707
+ end
708
+ end
709
+
710
+ def apply_burasage(command)
711
+ tag = nil
712
+ if implicit_close(:jisage)
713
+ @terprip = false
714
+ general_output
715
+ end
716
+ @noprint = true # always no print
717
+ command = Utils.convert_japanese_number(command)
718
+ if command.match?(TENTSUKI_COMMAND)
719
+ width = command.match(PAT_ORIKAESHI_JISAGE)[1]
720
+ tag = "<div class=\"burasage\" style=\"margin-left: #{width}em; text-indent: -#{width}em;\">"
721
+ else
722
+ matched = command.match(PAT_ORIKAESHI_JISAGE2)
723
+ left, indent = matched.to_a[1, 2]
724
+ left = left.to_i - indent.to_i
725
+ tag = "<div class=\"burasage\" style=\"margin-left: #{indent}em; text-indent: #{left}em;\">"
726
+ end
727
+ @indent_stack.push(tag)
728
+ @tag_stack.push('') # dummy
729
+ nil
730
+ end
731
+
732
+ def jisage_width(command)
733
+ Utils.convert_japanese_number(command).match(/(\d*)(?:#{JISAGE_COMMAND})/o)[1]
734
+ end
735
+
736
+ def apply_jisage(command)
737
+ if command.match?(MADE_MARK) || command.match?(END_MARK)
738
+ # 字下げ終わり
739
+ explicit_close(:jisage)
740
+ @indent_stack.pop
741
+ nil
742
+ elsif command.match?(ONELINE_COMMAND)
743
+ # 1行だけ
744
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
745
+ nil
746
+ elsif (@buffer.length == 0) && (@stream.peek_char(0) == "\r\n")
747
+ # commandのみ
748
+ @terprip = false
749
+ implicit_close(:jisage)
750
+ # adhook hack
751
+ @noprint = false
752
+ @indent_stack.push(:jisage)
753
+ Aozora2Html::Tag::MultilineJisage.new(self, jisage_width(command))
754
+ else # rubocop:disable Lint/DuplicateBranch
755
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
756
+ nil
757
+ end
758
+ end
759
+
760
+ def apply_warichu(command)
761
+ if command.match?(END_MARK)
762
+ if @stream.peek_char(0) != PAREN_END_MARK
763
+ push_char(PAREN_END_MARK)
764
+ end
765
+ push_char('</span>')
766
+ else
767
+ check = @ruby_buf.last
768
+
769
+ # NOTE: Do not remove duplicates!
770
+ if check.is_a?(String) && check.end_with?(PAREN_BEGIN_MARK)
771
+ push_char('<span class="warichu">')
772
+ else
773
+ push_char('<span class="warichu">')
774
+ push_char(PAREN_BEGIN_MARK)
775
+ end
776
+ end
777
+ nil
778
+ end
779
+
780
+ def chitsuki_length(command)
781
+ command = Utils.convert_japanese_number(command)
782
+ matched = command.match(PAT_JI_LEN)
783
+ if matched
784
+ matched[1]
785
+ else
786
+ '0'
787
+ end
788
+ end
789
+
790
+ def apply_chitsuki(string, multiline: false)
791
+ if string.match?(CLOSE_MARK + INDENT_TYPE[:chitsuki] + END_MARK) ||
792
+ string.match?(CLOSE_MARK + JIAGE_COMMAND + END_MARK)
793
+ explicit_close(:chitsuki)
794
+ @indent_stack.pop
795
+ nil
796
+ else
797
+ len = chitsuki_length(string)
798
+ if multiline
799
+ # 複数行指定
800
+ implicit_close(:chitsuki)
801
+ @indent_stack.push(:chitsuki)
802
+ Aozora2Html::Tag::MultilineChitsuki.new(self, len)
803
+ else
804
+ # 1行のみ
805
+ Aozora2Html::Tag::OnelineChitsuki.new(self, len)
806
+ end
807
+ end
808
+ end
809
+
810
+ def apply_midashi(command)
811
+ @indent_stack.push(:midashi)
812
+ if command.match?(DOGYO_MARK)
813
+ midashi_type = :dogyo
814
+ elsif command.match?(MADO_MARK)
815
+ midashi_type = :mado
816
+ else
817
+ midashi_type = :normal
818
+ @terprip = false
819
+ end
820
+ Aozora2Html::Tag::MultilineMidashi.new(self, command, midashi_type)
821
+ end
822
+
823
+ def apply_yokogumi(_command)
824
+ @indent_stack.push(:yokogumi)
825
+ Aozora2Html::Tag::MultilineYokogumi.new(self)
826
+ end
827
+
828
+ def apply_keigakomi(_command)
829
+ @indent_stack.push(:keigakomi)
830
+ Aozora2Html::Tag::Keigakomi.new(self)
831
+ end
832
+
833
+ def apply_caption(_command)
834
+ @indent_stack.push(:caption)
835
+ Aozora2Html::Tag::MultilineCaption.new(self)
836
+ end
837
+
838
+ def apply_jizume(command)
839
+ w = Utils.convert_japanese_number(command).match(/(\d*)(?:#{INDENT_TYPE[:jizume]})/)[1]
840
+ @indent_stack.push(:jizume)
841
+ Aozora2Html::Tag::Jizume.new(self, w)
842
+ end
843
+
844
+ def push_block_tag(tag, closing)
845
+ push_char(tag)
846
+ closing.concat(tag.close_tag)
847
+ end
848
+
849
+ def detect_style_size(style)
850
+ if style.match?('小'.to_sjis)
851
+ :sho
852
+ else
853
+ :dai
854
+ end
855
+ end
856
+
857
+ def exec_inline_start_command(command)
858
+ case command
859
+ when CHUUKI_COMMAND
860
+ @style_stack.push([command, '</ruby>'])
861
+ push_char('<ruby><rb>')
862
+ when TCY_COMMAND
863
+ @style_stack.push([command, '</span>'])
864
+ push_char('<span dir="ltr">')
865
+ when KEIGAKOMI_COMMAND
866
+ @style_stack.push([command, '</span>'])
867
+ push_char('<span class="keigakomi">')
868
+ when YOKOGUMI_COMMAND
869
+ @style_stack.push([command, '</span>'])
870
+ push_char('<span class="yokogumi">')
871
+ when CAPTION_COMMAND
872
+ @style_stack.push([command, '</span>'])
873
+ push_char('<span class="caption">')
874
+ when WARIGAKI_COMMAND
875
+ @style_stack.push([command, '</span>'])
876
+ push_char('<span class="warigaki">')
877
+ when OMIDASHI_COMMAND
878
+ @style_stack.push([command, '</a></h3>'])
879
+ @terprip = false
880
+ push_char("<h3 class=\"o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(100)}\">")
881
+ when NAKAMIDASHI_COMMAND
882
+ @style_stack.push([command, '</a></h4>'])
883
+ @terprip = false
884
+ push_char("<h4 class=\"naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(10)}\">")
885
+ when KOMIDASHI_COMMAND
886
+ @style_stack.push([command, '</a></h5>'])
887
+ @terprip = false
888
+ push_char("<h5 class=\"ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(1)}\">")
889
+ when DOGYO_OMIDASHI_COMMAND
890
+ @style_stack.push([command, '</a></h3>'])
891
+ push_char("<h3 class=\"dogyo-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(100)}\">")
892
+ when DOGYO_NAKAMIDASHI_COMMAND
893
+ @style_stack.push([command, '</a></h4>'])
894
+ push_char("<h4 class=\"dogyo-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(10)}\">")
895
+ when DOGYO_KOMIDASHI_COMMAND
896
+ @style_stack.push([command, '</a></h5>'])
897
+ push_char("<h5 class=\"dogyo-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(1)}\">")
898
+ when MADO_OMIDASHI_COMMAND
899
+ @style_stack.push([command, '</a></h3>'])
900
+ push_char("<h3 class=\"mado-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(100)}\">")
901
+ when MADO_NAKAMIDASHI_COMMAND
902
+ @style_stack.push([command, '</a></h4>'])
903
+ push_char("<h4 class=\"mado-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(10)}\">")
904
+ when MADO_KOMIDASHI_COMMAND
905
+ @style_stack.push([command, '</a></h5>'])
906
+ push_char("<h5 class=\"mado-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{@midashi_counter.generate_id(1)}\">")
907
+ when PAT_CHARSIZE
908
+ @style_stack.push([command, '</span>'])
909
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
910
+ times = Utils.convert_japanese_number(nest).to_i
911
+ daisho = detect_style_size(style)
912
+ html_class = daisho.to_s + times.to_s
913
+ size = Utils.create_font_size(times, daisho)
914
+ push_char("<span class=\"#{html_class}\" style=\"font-size: #{size};\">")
915
+ else
916
+ ## Decoration ##
917
+ key = command
918
+ filter = ->(x) { x }
919
+ if command.match?(PAT_DIRECTION)
920
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
921
+ # renew command
922
+ key = com
923
+ if command.match?(TEN_MARK)
924
+ case dir
925
+ when LEFT_MARK, UNDER_MARK
926
+ filter = ->(x) { "#{x}_after" }
927
+ end
928
+ elsif command.match?(SEN_MARK)
929
+ case dir
930
+ when LEFT_MARK, OVER_MARK
931
+ filter = ->(x) { x.sub('under', 'over') }
932
+ end
933
+ end
934
+ end
935
+
936
+ found = COMMAND_TABLE[key]
937
+ # found = [class, tag]
938
+ if found
939
+ @style_stack.push([command, "</#{found[1]}>"])
940
+ push_char("<#{found[1]} class=\"#{filter.call(found[0])}\">")
941
+ else
942
+ if $DEBUG
943
+ puts I18n.t(:warn_undefined_command, line_number, key)
944
+ end
945
+ nil
946
+ end
947
+ end
948
+ end
949
+
950
+ def exec_inline_end_command(command)
951
+ encount = command.sub(END_MARK, '')
952
+ if encount == MAIN_MARK
953
+ # force to finish main_text
954
+ @section = :tail
955
+ ensure_close
956
+ @noprint = true
957
+ @out.print "</div>\r\n<div class=\"after_text\">\r\n<hr />\r\n"
958
+ elsif encount.match?(CHUUKI_COMMAND) && (@style_stack.last_command == CHUUKI_COMMAND)
959
+ # special inline ruby
960
+ @style_stack.pop
961
+ _whole, ruby = encount.match(PAT_INLINE_RUBY).to_a
962
+ push_char('</rb><rp>(</rp><rt>'.to_sjis + ruby + '</rt><rp>)</rp></ruby>'.to_sjis)
963
+ elsif @style_stack.last_command.match?(encount)
964
+ push_char(@style_stack.pop[1])
965
+ else
966
+ raise Aozora2Html::Error, I18n.t(:invalid_nesting, encount, @style_stack.last_command)
967
+ end
968
+ end
969
+
970
+ def exec_block_start_command(command)
971
+ original_command = command.dup
972
+ command.sub!(/^#{OPEN_MARK}/o, '')
973
+ match_buf = +''
974
+ if command.match?(INDENT_TYPE[:jisage])
975
+ push_block_tag(apply_jisage(command), match_buf)
976
+ elsif command.match?(/(#{INDENT_TYPE[:chitsuki]}|#{JIAGE_COMMAND})$/)
977
+ push_block_tag(apply_chitsuki(command, multiline: true), match_buf)
978
+ end
979
+
980
+ if command.match?(INDENT_TYPE[:midashi])
981
+ push_block_tag(apply_midashi(command), match_buf)
982
+ end
983
+
984
+ if command.match?(INDENT_TYPE[:jizume])
985
+ if match_buf != ''
986
+ @indent_stack.pop
987
+ end
988
+ push_block_tag(apply_jizume(command), match_buf)
989
+ end
990
+
991
+ if command.match?(INDENT_TYPE[:yokogumi])
992
+ if match_buf != ''
993
+ @indent_stack.pop
994
+ end
995
+ push_block_tag(apply_yokogumi(command), match_buf)
996
+ end
997
+
998
+ if command.match?(INDENT_TYPE[:keigakomi])
999
+ if match_buf != ''
1000
+ @indent_stack.pop
1001
+ end
1002
+ push_block_tag(apply_keigakomi(command), match_buf)
1003
+ end
1004
+
1005
+ if command.match?(INDENT_TYPE[:caption])
1006
+ if match_buf != ''
1007
+ @indent_stack.pop
1008
+ end
1009
+ push_block_tag(apply_caption(command), match_buf)
1010
+ end
1011
+
1012
+ if command.match?(INDENT_TYPE[:futoji])
1013
+ if match_buf != ''
1014
+ @indent_stack.pop
1015
+ end
1016
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'futoji'), match_buf)
1017
+ @indent_stack.push(:futoji)
1018
+ end
1019
+ if command.match?(INDENT_TYPE[:shatai])
1020
+ if match_buf != ''
1021
+ @indent_stack.pop
1022
+ end
1023
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'shatai'), match_buf)
1024
+ @indent_stack.push(:shatai)
1025
+ end
1026
+
1027
+ if command.match?(PAT_CHARSIZE)
1028
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1029
+ if match_buf != ''
1030
+ @indent_stack.pop
1031
+ end
1032
+ daisho = detect_style_size(style)
1033
+ push_block_tag(Aozora2Html::Tag::FontSize.new(self,
1034
+ Utils.convert_japanese_number(nest).to_i,
1035
+ daisho),
1036
+ match_buf)
1037
+ @indent_stack.push(daisho)
1038
+ end
1039
+
1040
+ if match_buf == ''
1041
+ apply_rest_notes(original_command)
1042
+ else
1043
+ @tag_stack.push(match_buf)
1044
+ nil
1045
+ end
1046
+ end
1047
+
1048
+ def exec_block_end_command(command)
1049
+ original_command = command.dup
1050
+ command.sub!(/^#{CLOSE_MARK}/o, '')
1051
+ matched = false
1052
+ mode = detect_command_mode(command)
1053
+ if mode
1054
+ explicit_close(mode)
1055
+ matched = @indent_stack.pop
1056
+ end
1057
+
1058
+ if matched
1059
+ unless matched.is_a?(String)
1060
+ @terprip = false
1061
+ end
1062
+ nil
1063
+ else
1064
+ apply_rest_notes(original_command)
1065
+ end
1066
+ end
1067
+
1068
+ def exec_img_command(command, raw)
1069
+ matched = raw.match(PAT_IMAGE)
1070
+ if matched
1071
+ _whole, alt, src, _wh, width, height = matched.to_a
1072
+ css_class = if alt.match?(PHOTO_COMMAND)
1073
+ 'photo'
1074
+ else
1075
+ 'illustration'
1076
+ end
1077
+ Aozora2Html::Tag::Img.new(self, src, css_class, alt, width, height)
1078
+ else
1079
+ apply_rest_notes(command)
1080
+ end
1081
+ end
1082
+
1083
+ def exec_frontref_command(command)
1084
+ _whole, reference, spec1, spec2 = command.match(PAT_FRONTREF).to_a
1085
+ spec = if spec1
1086
+ spec1 + spec2
1087
+ else
1088
+ spec2
1089
+ end
1090
+ if reference
1091
+ found = search_front_reference(reference)
1092
+ if found
1093
+ tmp = exec_style(found, spec)
1094
+ return tmp if tmp
1095
+
1096
+ recovery_front_reference(found)
1097
+ end
1098
+ end
1099
+ # comment out?
1100
+ apply_rest_notes(command)
1101
+ end
1102
+
1103
+ # 傍記を並べる用
1104
+ #
1105
+ def multiply(bouki, times)
1106
+ sep = '&nbsp;'
1107
+ ([bouki] * times).join(sep)
1108
+ end
1109
+
1110
+ # rubyタグの再生成(本体はrearrange_ruby)
1111
+ #
1112
+ # complex ruby wrap up utilities -- don't erase! we will use soon ...
1113
+ #
1114
+ def rearrange_ruby_tag(targets, upper_ruby, under_ruby)
1115
+ Aozora2Html::Tag::Ruby.rearrange_ruby(self, targets, upper_ruby, under_ruby)
1116
+ end
1117
+
1118
+ def exec_style(targets, command)
1119
+ try_kuten = kuten2png(command)
1120
+ if try_kuten != command
1121
+ try_kuten
1122
+ elsif command.match?(TCY_COMMAND)
1123
+ Aozora2Html::Tag::Dir.new(self, targets)
1124
+ elsif command.match?(YOKOGUMI_COMMAND)
1125
+ Aozora2Html::Tag::InlineYokogumi.new(self, targets)
1126
+ elsif command.match?(KEIGAKOMI_COMMAND)
1127
+ Aozora2Html::Tag::InlineKeigakomi.new(self, targets)
1128
+ elsif command.match?(CAPTION_COMMAND)
1129
+ Aozora2Html::Tag::InlineCaption.new(self, targets)
1130
+ elsif command.match?(KAERITEN_COMMAND)
1131
+ Aozora2Html::Tag::Kaeriten.new(self, targets)
1132
+ elsif command.match?(KUNTEN_OKURIGANA_COMMAND)
1133
+ Aozora2Html::Tag::Okurigana.new(self, targets)
1134
+ elsif command.match?(MIDASHI_COMMAND)
1135
+ midashi_type = :normal
1136
+ if command.match?(DOGYO_MARK)
1137
+ midashi_type = :dogyo
1138
+ elsif command.match?(MADO_MARK)
1139
+ midashi_type = :mado
1140
+ else
1141
+ @terprip = false
1142
+ end
1143
+ Aozora2Html::Tag::Midashi.new(self, targets, command, midashi_type)
1144
+ elsif command.match?(PAT_CHARSIZE)
1145
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1146
+ Aozora2Html::Tag::InlineFontSize.new(self, targets,
1147
+ Utils.convert_japanese_number(nest).to_i,
1148
+ detect_style_size(style))
1149
+ elsif command.match?(PAT_RUBY_DIR)
1150
+ _whole, _dir, under = command.match(PAT_RUBY_DIR).to_a
1151
+ if (targets.length == 1) && targets[0].is_a?(Aozora2Html::Tag::Ruby)
1152
+ tag = targets[0]
1153
+ raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby) unless tag.under_ruby == ''
1154
+
1155
+ tag.under_ruby = under
1156
+ tag
1157
+ else
1158
+ rearrange_ruby_tag(targets, '', under)
1159
+ end
1160
+ elsif command.match?(PAT_CHUUKI)
1161
+ rearrange_ruby_tag(targets, PAT_CHUUKI.match(command).to_a[1], '')
1162
+ elsif command.match?(PAT_BOUKI)
1163
+ rearrange_ruby_tag(targets, multiply(PAT_BOUKI.match(command).to_a[1], targets.to_s.length), '')
1164
+ else
1165
+ ## direction fix! ##
1166
+ filter = ->(x) { x }
1167
+ if command.match?(PAT_DIRECTION)
1168
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
1169
+ # renew command
1170
+ command = com
1171
+ if command.match?(TEN_MARK)
1172
+ case dir
1173
+ when LEFT_MARK, UNDER_MARK
1174
+ filter = ->(x) { "#{x}_after" }
1175
+ end
1176
+ elsif command.match?(SEN_MARK)
1177
+ case dir
1178
+ when LEFT_MARK, OVER_MARK
1179
+ filter = ->(x) { x.sub('under', 'over') }
1180
+ end
1181
+ end
1182
+ end
1183
+
1184
+ found = COMMAND_TABLE[command]
1185
+ # found = [class, tag]
1186
+ if found
1187
+ Aozora2Html::Tag::Decorate.new(self, targets, filter.call(found[0]), found[1])
1188
+ end
1189
+ end
1190
+ end
1191
+
1192
+ def apply_dakuten_katakana(command)
1193
+ n = command.match(/1-7-8([2345])/).to_a[1]
1194
+ frontref = DAKUTEN_KATAKANA_TABLE[n]
1195
+ found = search_front_reference(frontref)
1196
+ if found
1197
+ Aozora2Html::Tag::DakutenKatakana.new(self, n, found.join, gaiji_dir: @gaiji_dir)
1198
+ else
1199
+ apply_rest_notes(command)
1200
+ end
1201
+ end
1202
+
1203
+ # くの字点の処理
1204
+ #
1205
+ # くの字点は現状そのまま出力するのでフッタの「表記について」で出力するかどうかのフラグ処理だけ行う
1206
+ def assign_kunoji
1207
+ second = @stream.peek_char(0)
1208
+ case second
1209
+ when NOJI
1210
+ @chuuki_table[:kunoji] = true
1211
+ when DAKUTEN
1212
+ if @stream.peek_char(1) == NOJI
1213
+ @chuuki_table[:dakutenkunoji] = true
1214
+ end
1215
+ end
1216
+ end
1217
+
1218
+ def apply_rest_notes(command)
1219
+ @chuuki_table[:chuki] = true
1220
+ Aozora2Html::Tag::EditorNote.new(self, command)
1221
+ end
1222
+
1223
+ # |が来たときは文字種を無視してruby_bufを守らなきゃいけない
1224
+ def apply_ruby
1225
+ @ruby_buf.protected = nil
1226
+ ruby, _raw = read_to_nest(RUBY_END_MARK)
1227
+ if ruby.length == 0
1228
+ # escaped ruby character
1229
+ return RUBY_BEGIN_MARK + RUBY_END_MARK
1230
+ end
1231
+
1232
+ @buffer.concat(@ruby_buf.create_ruby(self, ruby))
1233
+
1234
+ nil
1235
+ end
1236
+
1237
+ # parse_bodyのフッタ版
1238
+ def parse_tail
1239
+ char = read_char
1240
+ check = true
1241
+ case char
1242
+ when ACCENT_BEGIN
1243
+ check = false
1244
+ char = read_accent
1245
+ when @endchar
1246
+ throw :terminate
1247
+ when GAIJI_MARK
1248
+ char = dispatch_gaiji
1249
+ when COMMAND_BEGIN
1250
+ char = dispatch_aozora_command
1251
+ when KU
1252
+ assign_kunoji
1253
+ when RUBY_BEGIN_MARK
1254
+ char = apply_ruby
1255
+ end
1256
+
1257
+ case char
1258
+ when "\r\n"
1259
+ tail_output
1260
+ when RUBY_PREFIX
1261
+ @ruby_buf.dump_into(@buffer)
1262
+ @ruby_buf.protected = true
1263
+ when nil
1264
+ # noop
1265
+ else
1266
+ if check
1267
+ Utils.illegal_char_check(char, line_number)
1268
+ end
1269
+ push_chars(escape_special_chars(char))
1270
+ end
1271
+ end
1272
+
1273
+ # general_outputのフッタ版
1274
+ def tail_output
1275
+ @ruby_buf.dump_into(@buffer)
1276
+ string = @buffer.join
1277
+ @buffer = TextBuffer.new
1278
+ string.gsub!('info@aozora.gr.jp', '<a href="mailto: info@aozora.gr.jp">info@aozora.gr.jp</a>')
1279
+ string.gsub!('青空文庫(http://www.aozora.gr.jp/)'.to_sjis) { "<a href=\"http://www.aozora.gr.jp/\">#{$&}</a>" }
1280
+ if string.match?(%r{(<br />$|</p>$|</h\d>$|<div.*>$|</div>$|^<[^>]*>$)})
1281
+ @out.print string, "\r\n"
1282
+ else
1283
+ @out.print string, "<br />\r\n"
1284
+ end
1285
+ end
1286
+
1287
+ # `●表記について`で使用した注記等を出力する
1288
+ def hyoki
1289
+ # <br /> times fix
1290
+ @out.print "<br />\r\n</div>\r\n<div class=\"notation_notes\">\r\n<hr />\r\n<br />\r\n●表記について<br />\r\n<ul>\r\n".to_sjis
1291
+ @out.print "\t<li>このファイルは W3C 勧告 XHTML1.1 にそった形式で作成されています。</li>\r\n".to_sjis
1292
+ if @chuuki_table[:chuki]
1293
+ @out.print "\t<li>[#…]は、入力者による注を表す記号です。</li>\r\n".to_sjis
1294
+ end
1295
+ if @chuuki_table[:kunoji]
1296
+ if @chuuki_table[:dakutenkunoji]
1297
+ @out.printf("\t<li>「くの字点」は「%s」で、「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI, KU + DAKUTEN + NOJI)
1298
+ else
1299
+ @out.printf("\t<li>「くの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI)
1300
+ end
1301
+ elsif @chuuki_table[:dakutenkunoji]
1302
+ @out.printf("\t<li>「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + DAKUTEN + NOJI)
1303
+ end
1304
+ if @chuuki_table[:newjis] && !Aozora2Html::Tag::EmbedGaiji.use_jisx0213
1305
+ @out.print "\t<li>「くの字点」をのぞくJIS X 0213にある文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1306
+ end
1307
+ if @chuuki_table[:accent] && !Aozora2Html::Tag::Accent.use_jisx0213
1308
+ @out.print "\t<li>アクセント符号付きラテン文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1309
+ end
1310
+ if @images[0]
1311
+ @out.print "\t<li>この作品には、JIS X 0213にない、以下の文字が用いられています。(数字は、底本中の出現「ページ-行」数。)これらの文字は本文内では「※[#…]」の形で示しました。</li>\r\n</ul>\r\n<br />\r\n\t\t<table class=\"gaiji_list\">\r\n".to_sjis
1312
+ @images.each do |cell|
1313
+ k, *v = cell
1314
+ vs = v.join('、'.to_sjis)
1315
+ @out.print "\t\t\t<tr>\r\n\t\t\t\t<td>\r\n\t\t\t\t#{k}\r\n\t\t\t\t</td>\r\n\t\t\t\t<td>&nbsp;&nbsp;</td>\r\n\t\t\t\t<td>\r\n#{vs}\t\t\t\t</td>\r\n\t\t\t\t<!--\r\n\t\t\t\t<td>\r\n\t\t\t\t" + '  '.to_sjis + "<img src=\"../../../gaiji/others/xxxx.png\" alt=\"#{k}\" width=32 height=32 />\r\n\t\t\t\t</td>\r\n\t\t\t\t-->\r\n\t\t\t</tr>\r\n".to_sjis
1316
+ end
1317
+ @out.print "\t\t</table>\r\n".to_sjis
1318
+ else
1319
+ @out.print "</ul>\r\n" # <ul>内に<li>以外のエレメントが来るのは不正なので修正
1320
+ end
1321
+ @out.print "</div>\r\n"
1322
+ end
1323
+
1324
+ # Original Aozora2Html#push_chars does not convert "'" into '&#39;'; it's old behaivor of CGI.escapeHTML().
1325
+ def escape_special_chars(char)
1326
+ if char.is_a?(String)
1327
+ char.gsub(/[&"<>]/, { '&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;' })
1328
+ else
1329
+ char
1330
+ end
1331
+ end
6
1332
  end