aozora2html 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +4 -1
  4. data/.rubocop.yml +36 -152
  5. data/.rubocop_todo.yml +7 -0
  6. data/CHANGELOG.md +26 -0
  7. data/Gemfile +2 -0
  8. data/Guardfile +3 -1
  9. data/HACKING.md +45 -0
  10. data/README.md +14 -6
  11. data/Rakefile +12 -5
  12. data/aozora2html.gemspec +24 -22
  13. data/bin/aozora2html +21 -19
  14. data/lib/aozora2html/accent_parser.rb +62 -54
  15. data/lib/aozora2html/error.rb +5 -4
  16. data/lib/aozora2html/header.rb +20 -18
  17. data/lib/aozora2html/i18n.rb +40 -20
  18. data/lib/aozora2html/ruby_buffer.rb +63 -28
  19. data/lib/aozora2html/string_refinements.rb +36 -0
  20. data/lib/aozora2html/style_stack.rb +6 -0
  21. data/lib/aozora2html/tag/accent.rb +10 -12
  22. data/lib/aozora2html/tag/block.rb +11 -9
  23. data/lib/aozora2html/tag/chitsuki.rb +6 -2
  24. data/lib/aozora2html/tag/dakuten_katakana.rb +10 -8
  25. data/lib/aozora2html/tag/decorate.rb +4 -3
  26. data/lib/aozora2html/tag/dir.rb +4 -2
  27. data/lib/aozora2html/tag/editor_note.rb +7 -4
  28. data/lib/aozora2html/tag/embed_gaiji.rb +15 -11
  29. data/lib/aozora2html/tag/font_size.rb +5 -2
  30. data/lib/aozora2html/tag/gaiji.rb +4 -3
  31. data/lib/aozora2html/tag/img.rb +4 -4
  32. data/lib/aozora2html/tag/indent.rb +3 -3
  33. data/lib/aozora2html/tag/inline.rb +10 -7
  34. data/lib/aozora2html/tag/inline_caption.rb +4 -2
  35. data/lib/aozora2html/tag/inline_font_size.rb +4 -3
  36. data/lib/aozora2html/tag/inline_keigakomi.rb +4 -2
  37. data/lib/aozora2html/tag/inline_yokogumi.rb +4 -3
  38. data/lib/aozora2html/tag/jisage.rb +3 -1
  39. data/lib/aozora2html/tag/jizume.rb +3 -0
  40. data/lib/aozora2html/tag/kaeriten.rb +4 -2
  41. data/lib/aozora2html/tag/keigakomi.rb +15 -9
  42. data/lib/aozora2html/tag/kunten.rb +4 -4
  43. data/lib/aozora2html/tag/midashi.rb +3 -1
  44. data/lib/aozora2html/tag/multiline.rb +3 -0
  45. data/lib/aozora2html/tag/multiline_caption.rb +6 -8
  46. data/lib/aozora2html/tag/multiline_chitsuki.rb +3 -1
  47. data/lib/aozora2html/tag/multiline_jisage.rb +3 -1
  48. data/lib/aozora2html/tag/multiline_midashi.rb +6 -3
  49. data/lib/aozora2html/tag/multiline_style.rb +5 -3
  50. data/lib/aozora2html/tag/multiline_yokogumi.rb +6 -9
  51. data/lib/aozora2html/tag/okurigana.rb +4 -2
  52. data/lib/aozora2html/tag/oneline_chitsuki.rb +3 -2
  53. data/lib/aozora2html/tag/oneline_indent.rb +8 -1
  54. data/lib/aozora2html/tag/oneline_jisage.rb +3 -0
  55. data/lib/aozora2html/tag/reference_mentioned.rb +22 -21
  56. data/lib/aozora2html/tag/ruby.rb +174 -70
  57. data/lib/aozora2html/tag/un_embed_gaiji.rb +8 -2
  58. data/lib/aozora2html/tag.rb +40 -38
  59. data/lib/aozora2html/tag_parser.rb +23 -16
  60. data/lib/aozora2html/text_buffer.rb +50 -0
  61. data/lib/aozora2html/utils.rb +113 -50
  62. data/lib/aozora2html/version.rb +3 -1
  63. data/lib/aozora2html/yaml_loader.rb +8 -2
  64. data/lib/aozora2html/zip.rb +4 -0
  65. data/lib/aozora2html.rb +1358 -3
  66. data/lib/extensions.rb +2 -34
  67. data/lib/jstream.rb +96 -25
  68. data/sample/chukiichiran_kinyurei.html +15 -2
  69. data/sample/chukiichiran_kinyurei.txt +15 -2
  70. data/test/test_aozora2html.rb +137 -148
  71. data/test/test_aozora_accent_parser.rb +26 -9
  72. data/test/test_command_parse.rb +25 -22
  73. data/test/test_compat.rb +3 -4
  74. data/test/test_dakuten_katakana_tag.rb +10 -12
  75. data/test/test_decorate_tag.rb +9 -6
  76. data/test/test_dir_tag.rb +9 -6
  77. data/test/test_editor_note_tag.rb +8 -5
  78. data/test/test_exception.rb +10 -8
  79. data/test/test_font_size_tag.rb +16 -13
  80. data/test/test_gaiji_tag.rb +15 -14
  81. data/test/test_header.rb +25 -40
  82. data/test/test_helper.rb +3 -1
  83. data/test/test_i18n.rb +22 -6
  84. data/test/test_img_tag.rb +9 -5
  85. data/test/test_inline_caption_tag.rb +9 -6
  86. data/test/test_inline_font_size_tag.rb +13 -10
  87. data/test/test_inline_keigakomi_tag.rb +9 -6
  88. data/test/test_inline_yokogumi_tag.rb +9 -6
  89. data/test/test_jizume_tag.rb +9 -7
  90. data/test/test_jstream.rb +33 -30
  91. data/test/test_kaeriten_tag.rb +9 -6
  92. data/test/test_keigakomi_tag.rb +11 -9
  93. data/test/test_midashi_tag.rb +15 -14
  94. data/test/test_multiline_caption_tag.rb +7 -5
  95. data/test/test_multiline_midashi_tag.rb +24 -25
  96. data/test/test_multiline_style_tag.rb +9 -7
  97. data/test/test_multiline_yokogumi_tag.rb +7 -5
  98. data/test/test_okurigana_tag.rb +9 -6
  99. data/test/test_ruby_parse.rb +14 -14
  100. data/test/test_ruby_tag.rb +9 -6
  101. data/test/test_tag_parser.rb +28 -26
  102. metadata +60 -14
  103. data/.travis.yml +0 -12
  104. data/lib/t2hs.rb +0 -1607
data/lib/aozora2html.rb CHANGED
@@ -1,6 +1,1361 @@
1
- require "aozora2html/version"
2
- require 't2hs'
1
+ require_relative 'aozora2html/version'
2
+ require_relative 'extensions'
3
+ require_relative 'aozora2html/error'
4
+ require_relative 'aozora2html/i18n'
5
+ require_relative 'jstream'
6
+ require_relative 'aozora2html/tag'
7
+ require_relative 'aozora2html/tag_parser'
8
+ require_relative 'aozora2html/accent_parser'
9
+ require_relative 'aozora2html/style_stack'
10
+ require_relative 'aozora2html/header'
11
+ require_relative 'aozora2html/ruby_buffer'
12
+ require_relative 'aozora2html/text_buffer'
13
+ require_relative 'aozora2html/yaml_loader'
14
+ require_relative 'aozora2html/utils'
15
+ require_relative 'aozora2html/string_refinements'
3
16
 
4
- ## already defined in t2hs.rb
17
+ # 青空文庫形式のテキストファイルを html に整形する ruby スクリプト
18
+ # 変換器本体
5
19
  class Aozora2Html
20
+ # 全角バックスラッシュが出せないから直打ち
21
+ KU = ['18e5'].pack('h*').force_encoding('shift_jis')
22
+ NOJI = ['18f5'].pack('h*').force_encoding('shift_jis')
23
+ DAKUTEN = ['18d8'].pack('h*').force_encoding('shift_jis')
24
+
25
+ using StringRefinements
26
+
27
+ GAIJI_MARK = '※'.to_sjis
28
+ IGETA_MARK = '#'.to_sjis
29
+ RUBY_BEGIN_MARK = '《'.to_sjis
30
+ RUBY_END_MARK = '》'.to_sjis
31
+ PAREN_BEGIN_MARK = '('.to_sjis
32
+ PAREN_END_MARK = ')'.to_sjis
33
+ SIZE_SMALL = '小'.to_sjis
34
+ SIZE_MIDDLE = '中'.to_sjis
35
+ SIZE_LARGE = '大'.to_sjis
36
+ TEIHON_MARK = '底本:'.to_sjis
37
+ COMMAND_BEGIN = '['.to_sjis
38
+ COMMAND_END = ']'.to_sjis
39
+ ACCENT_BEGIN = '〔'.to_sjis
40
+ ACCENT_END = '〕'.to_sjis
41
+ AOZORABUNKO = '青空文庫'.to_sjis
42
+ # PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/
43
+ PAT_EDITOR = '(校訂|編|編集)$'.to_sjis
44
+ PAT_HENYAKU = '編訳$'.to_sjis
45
+ PAT_TRANSLATOR = '訳$'.to_sjis
46
+ RUBY_PREFIX = '|'.to_sjis
47
+ PAT_RUBY = /#{"《.*?》".to_sjis}/.freeze
48
+ PAT_DIRECTION = '(右|左|上|下)に(.*)'.to_sjis
49
+ PAT_REF = '^「.+」'.to_sjis
50
+ CHUUKI_COMMAND = '注記付き'.to_sjis
51
+ TCY_COMMAND = '縦中横'.to_sjis
52
+ KEIGAKOMI_COMMAND = '罫囲み'.to_sjis
53
+ YOKOGUMI_COMMAND = '横組み'.to_sjis
54
+ CAPTION_COMMAND = 'キャプション'.to_sjis
55
+ WARIGAKI_COMMAND = '割書'.to_sjis
56
+ KAERITEN_COMMAND = '返り点'.to_sjis
57
+ KUNTEN_OKURIGANA_COMMAND = '訓点送り仮名'.to_sjis
58
+ MIDASHI_COMMAND = '見出し'.to_sjis
59
+ OMIDASHI_COMMAND = '大見出し'.to_sjis
60
+ NAKAMIDASHI_COMMAND = '中見出し'.to_sjis
61
+ KOMIDASHI_COMMAND = '小見出し'.to_sjis
62
+ DOGYO_OMIDASHI_COMMAND = '同行大見出し'.to_sjis
63
+ DOGYO_NAKAMIDASHI_COMMAND = '同行中見出し'.to_sjis
64
+ DOGYO_KOMIDASHI_COMMAND = '同行小見出し'.to_sjis
65
+ MADO_OMIDASHI_COMMAND = '窓大見出し'.to_sjis
66
+ MADO_NAKAMIDASHI_COMMAND = '窓中見出し'.to_sjis
67
+ MADO_KOMIDASHI_COMMAND = '窓小見出し'.to_sjis
68
+ LEFT_MARK = '左'.to_sjis
69
+ UNDER_MARK = '下'.to_sjis
70
+ OVER_MARK = '上'.to_sjis
71
+ MAIN_MARK = '本文'.to_sjis
72
+ END_MARK = '終わり'.to_sjis
73
+ TEN_MARK = '点'.to_sjis
74
+ SEN_MARK = '線'.to_sjis
75
+ OPEN_MARK = 'ここから'.to_sjis
76
+ CLOSE_MARK = 'ここで'.to_sjis
77
+ MADE_MARK = 'まで'.to_sjis
78
+ DOGYO_MARK = '同行'.to_sjis
79
+ MADO_MARK = '窓'.to_sjis
80
+ JIAGE_COMMAND = '字上げ'.to_sjis
81
+ JISAGE_COMMAND = '字下げ'.to_sjis
82
+ PHOTO_COMMAND = '写真'.to_sjis
83
+ ORIKAESHI_COMMAND = '折り返して'.to_sjis
84
+ ONELINE_COMMAND = 'この行'.to_sjis
85
+ NON_0213_GAIJI = '非0213外字'.to_sjis
86
+ WARICHU_COMMAND = '割り注'.to_sjis
87
+ TENTSUKI_COMMAND = '天付き'.to_sjis
88
+ PAT_REST_NOTES = '(左|下)に「(.*)」の(ルビ|注記|傍記)'.to_sjis
89
+ PAT_KUTEN = /#{"「※」[は|の]".to_sjis}/.freeze
90
+ PAT_KUTEN_DUAL = '※.*※'.to_sjis
91
+ PAT_GAIJI = '(?:#)(.*)(?:、)(.*)'.to_sjis
92
+ PAT_KAERITEN = '^([一二三四五六七八九十レ上中下甲乙丙丁天地人]+)$'.to_sjis
93
+ PAT_OKURIGANA = '^((.+))$'.to_sjis
94
+ PAT_REMOVE_OKURIGANA = /#{"[()]".to_sjis}/.freeze
95
+ PAT_CHITSUKI = /#{"(地付き|字上げ)(終わり)*$".to_sjis}/.freeze
96
+ PAT_ORIKAESHI_JISAGE = '折り返して(\\d*)字下げ'.to_sjis
97
+ PAT_ORIKAESHI_JISAGE2 = '(\\d*)字下げ、折り返して(\\d*)字下げ'.to_sjis
98
+ PAT_JI_LEN = '([0-9]+)字'.to_sjis
99
+ PAT_INLINE_RUBY = '「(.*)」の注記付き'.to_sjis
100
+ PAT_IMAGE = '(.*)((fig.+\\.png)(、横([0-9]+)×縦([0-9]+))*)入る'.to_sjis
101
+ PAT_FRONTREF = '「([^「」]*(?:「.+」)*[^「」]*)」[にはの](「.+」の)*(.+)'.to_sjis
102
+ PAT_RUBY_DIR = '(左|下)に「([^」]*)」の(ルビ|注記)'.to_sjis
103
+ PAT_CHUUKI = /#{"「(.+?)」の注記".to_sjis}/.freeze
104
+ PAT_BOUKI = /#{"「(.)」の傍記".to_sjis}/.freeze
105
+ PAT_CHARSIZE = /#{"(.*)段階(..)な文字".to_sjis}/.freeze
106
+
107
+ REGEX_HIRAGANA = Regexp.new('[ぁ-んゝゞ]'.to_sjis)
108
+ REGEX_KATAKANA = Regexp.new('[ァ-ンーヽヾヴ]'.to_sjis)
109
+ REGEX_ZENKAKU = Regexp.new('[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]'.to_sjis)
110
+ REGEX_HANKAKU = Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".to_sjis)
111
+ REGEX_KANJI = Regexp.new('[亜-熙々※仝〆〇ヶ]'.to_sjis)
112
+
113
+ DYNAMIC_CONTENTS = "<div id=\"card\">\r\n<hr />\r\n<br />\r\n<a href=\"JavaScript:goLibCard();\" id=\"goAZLibCard\">●図書カード</a><script type=\"text/javascript\" src=\"../../contents.js\"></script>\r\n<script type=\"text/javascript\" src=\"../../golibcard.js\"></script>\r\n</div>".to_sjis
114
+
115
+ # KUNOJI = ["18e518f5"].pack("h*")
116
+ # utf8 ["fecbf8fecbcb"].pack("h*")
117
+ # DAKUTENKUNOJI = ["18e518d818f5"].pack("h*")
118
+ # utf8 ["fecbf82e083bfecbcb"].pack("h*")
119
+
120
+ loader = Aozora2Html::YamlLoader.new(File.dirname(__FILE__))
121
+ ACCENT_TABLE = loader.load('../yml/accent_table.yml')
122
+
123
+ # [class, tag]
124
+ COMMAND_TABLE = loader.load('../yml/command_table.yml')
125
+ JIS2UCS = loader.load('../yml/jis2ucs.yml')
126
+
127
+ INDENT_TYPE = {
128
+ jisage: '字下げ'.to_sjis,
129
+ chitsuki: '地付き'.to_sjis,
130
+ midashi: '見出し'.to_sjis,
131
+ jizume: '字詰め'.to_sjis,
132
+ yokogumi: '横組み'.to_sjis,
133
+ keigakomi: '罫囲み'.to_sjis,
134
+ caption: 'キャプション'.to_sjis,
135
+ futoji: '太字'.to_sjis,
136
+ shatai: '斜体'.to_sjis,
137
+ dai: '大きな文字'.to_sjis,
138
+ sho: '小さな文字'.to_sjis
139
+ }.freeze
140
+
141
+ DAKUTEN_KATAKANA_TABLE = {
142
+ '2' => 'ワ゛'.to_sjis,
143
+ '3' => 'ヰ゛'.to_sjis,
144
+ '4' => 'ヱ゛'.to_sjis,
145
+ '5' => 'ヲ゛'.to_sjis
146
+ }.freeze
147
+
148
+ def initialize(input, output, gaiji_dir: nil, css_files: nil)
149
+ @stream = if input.respond_to?(:read) ## readable IO?
150
+ Jstream.new(input)
151
+ else
152
+ Jstream.new(File.open(input, 'rb:Shift_JIS'))
153
+ end
154
+ @out = if output.respond_to?(:print) ## writable IO?
155
+ output
156
+ else
157
+ File.open(output, 'w')
158
+ end
159
+ @gaiji_dir = gaiji_dir || '../../../gaiji/'
160
+ @css_files = css_files || Array['../../aozora.css']
161
+
162
+ @buffer = TextBuffer.new
163
+ @ruby_buf = RubyBuffer.new
164
+ @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
165
+ @header = Aozora2Html::Header.new(css_files: @css_files) ## ヘッダ行の配列
166
+ @style_stack = StyleStack.new ## スタイルのスタック
167
+ @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
168
+ @images = [] ## 使用した外字の画像保持用
169
+ @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
170
+ @tag_stack = []
171
+ @midashi_id = 0 ## 見出しのカウンタ、見出しの種類によって増分が異なる
172
+ @terprip = true ## 改行制御用 (terpriはLisp由来?)
173
+ @endchar = :eof ## 解析終了文字、AccentParserやTagParserでは異なる
174
+ @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
175
+ end
176
+
177
+ def line_number
178
+ @stream.line
179
+ end
180
+
181
+ def block_allowed_context?
182
+ # inline_tagが開いていないかチェックすれば十分
183
+ @style_stack.empty?
184
+ end
185
+
186
+ # parseする
187
+ #
188
+ # 終了時(終端まで来た場合)にはthrow :terminateで脱出する
189
+ #
190
+ def process
191
+ catch(:terminate) do
192
+ parse
193
+ rescue Aozora2Html::Error => e
194
+ puts e.message(line_number)
195
+ if e.is_a?(Aozora2Html::Error)
196
+ exit(2)
197
+ end
198
+ end
199
+ tail_output # final call
200
+ finalize
201
+ close
202
+ rescue StandardError => e
203
+ puts "ERROR: line: #{line_number}"
204
+ raise e
205
+ end
206
+
207
+ def new_midashi_id(size)
208
+ if size.is_a?(Integer)
209
+ @midashi_id += size
210
+ return @midashi_id
211
+ end
212
+
213
+ case size
214
+ when /#{SIZE_SMALL}/o
215
+ inc = 1
216
+ when /#{SIZE_MIDDLE}/o
217
+ inc = 10
218
+ when /#{SIZE_LARGE}/o
219
+ inc = 100
220
+ else
221
+ raise Aozora2Html::Error, I18n.t(:undefined_header)
222
+ end
223
+ @midashi_id += inc
224
+ end
225
+
226
+ def kuten2png(substring)
227
+ desc = substring.gsub(PAT_KUTEN, '')
228
+ matched = desc.match(/[12]-\d{1,2}-\d{1,2}/)
229
+ if matched && !desc.match?(NON_0213_GAIJI) && !desc.match?(PAT_KUTEN_DUAL)
230
+ @chuuki_table[:newjis] = true
231
+ codes = matched[0].split('-')
232
+ folder = sprintf('%1d-%02d', codes[0], codes[1])
233
+ code = sprintf('%1d-%02d-%02d', *codes)
234
+ Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK, ''), gaiji_dir: @gaiji_dir)
235
+ else
236
+ substring
237
+ end
238
+ end
239
+
240
+ # コマンド文字列からモードのシンボルを取り出す
241
+ #
242
+ # @return [Symbol]
243
+ #
244
+ def detect_command_mode(command)
245
+ if command.match?(INDENT_TYPE[:chitsuki] + END_MARK) || command.match?(JIAGE_COMMAND + END_MARK)
246
+ return :chitsuki
247
+ end
248
+
249
+ INDENT_TYPE.each_key do |key|
250
+ if command.match?(INDENT_TYPE[key])
251
+ return key
252
+ end
253
+ end
254
+ nil
255
+ end
256
+
257
+ private
258
+
259
+ # 一文字読み込む
260
+ def read_char
261
+ @stream.read_char
262
+ end
263
+
264
+ # 一行読み込む
265
+ def read_line
266
+ @stream.read_line
267
+ end
268
+
269
+ def read_accent
270
+ Aozora2Html::AccentParser.new(@stream, ACCENT_END, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
271
+ end
272
+
273
+ def read_to_nest(endchar)
274
+ Aozora2Html::TagParser.new(@stream, endchar, @chuuki_table, @images, gaiji_dir: @gaiji_dir).process
275
+ end
276
+
277
+ def finalize
278
+ hyoki
279
+ dynamic_contents
280
+ @out.print("</body>\r\n</html>\r\n")
281
+ end
282
+
283
+ def dynamic_contents
284
+ @out.print DYNAMIC_CONTENTS
285
+ end
286
+
287
+ def close
288
+ @stream.close
289
+ @out.close
290
+ end
291
+
292
+ # 記法のシンボル名から文字列へ変換する
293
+ # シンボルが見つからなければそのまま返す
294
+ def convert_indent_type(type)
295
+ INDENT_TYPE[type] || type
296
+ end
297
+
298
+ def check_close_match(type)
299
+ ind = if @indent_stack.last.is_a?(String)
300
+ @noprint = true
301
+ :jisage
302
+ else
303
+ @indent_stack.last
304
+ end
305
+ if ind == type
306
+ nil
307
+ else
308
+ convert_indent_type(type)
309
+ end
310
+ end
311
+
312
+ def implicit_close(type)
313
+ return unless @indent_stack.last
314
+
315
+ if check_close_match(type)
316
+ # ok, nested multiline tags, go ahead
317
+ else
318
+ # not nested, please close
319
+ @indent_stack.pop
320
+ tag = @tag_stack.pop
321
+ if tag
322
+ push_chars(tag)
323
+ end
324
+ end
325
+ end
326
+
327
+ # 本文が終わってよいかチェックし、終わっていなければ例外をあげる
328
+ def ensure_close
329
+ n = @indent_stack.last
330
+ return unless n
331
+
332
+ raise Aozora2Html::Error, I18n.t(:terminate_in_style, convert_indent_type(n))
333
+ end
334
+
335
+ def explicit_close(type)
336
+ n = check_close_match(type)
337
+ if n
338
+ raise Aozora2Html::Error, I18n.t(:invalid_closing, n, n)
339
+ end
340
+
341
+ tag = @tag_stack.pop
342
+ return unless tag
343
+
344
+ push_chars(tag)
345
+ end
346
+
347
+ # main loop
348
+ def parse
349
+ loop do
350
+ case @section
351
+ when :head
352
+ parse_header
353
+ when :head_end
354
+ judge_chuuki
355
+ when :chuuki, :chuuki_in
356
+ parse_chuuki
357
+ when :body
358
+ parse_body
359
+ when :tail
360
+ parse_tail
361
+ else
362
+ raise Aozora2Html::Error, 'encount undefined condition'
363
+ end
364
+ end
365
+ end
366
+
367
+ def judge_chuuki
368
+ # 注記が入るかどうかチェック
369
+ i = 0
370
+ loop do
371
+ case @stream.peek_char(i)
372
+ when '-'
373
+ i += 1
374
+ when "\r\n"
375
+ @section = if i == 0
376
+ :body
377
+ else
378
+ :chuuki
379
+ end
380
+ return
381
+ else
382
+ @section = :body
383
+ @out.print("<br />\r\n")
384
+ return
385
+ end
386
+ end
387
+ end
388
+
389
+ # headerは一行ずつ読む
390
+ def parse_header
391
+ string = read_line
392
+ # refine from Tomita 09/06/14
393
+ if string == '' # 空行がくれば、そこでヘッダー終了とみなす
394
+ @section = :head_end
395
+ @out.print @header.to_html
396
+ else
397
+ string.gsub!(RUBY_PREFIX, '')
398
+ string.gsub!(PAT_RUBY, '')
399
+ @header.push(string)
400
+ end
401
+ end
402
+
403
+ def parse_chuuki
404
+ string = read_line
405
+ return unless string.match?(/^-+$/)
406
+
407
+ case @section
408
+ when :chuuki
409
+ @section = :chuuki_in
410
+ when :chuuki_in
411
+ @section = :body
412
+ end
413
+ end
414
+
415
+ # 本体解析部
416
+ #
417
+ # 1文字ずつ読み込み、dispatchして@buffer,@ruby_bufへしまう
418
+ # 改行コードに当たったら溜め込んだものをgeneral_outputする
419
+ #
420
+ def parse_body
421
+ char = read_char
422
+ check = true
423
+ case char
424
+ when ACCENT_BEGIN
425
+ check = false
426
+ char = read_accent
427
+ when TEIHON_MARK[0]
428
+ if @buffer.length == 0
429
+ ending_check
430
+ end
431
+ when GAIJI_MARK
432
+ char = dispatch_gaiji
433
+ when COMMAND_BEGIN
434
+ char = dispatch_aozora_command
435
+ when KU
436
+ assign_kunoji
437
+ when RUBY_BEGIN_MARK
438
+ char = apply_ruby
439
+ end
440
+
441
+ case char
442
+ when "\r\n"
443
+ general_output
444
+ when RUBY_PREFIX
445
+ @ruby_buf.dump_into(@buffer)
446
+ @ruby_buf.protected = true
447
+ when @endchar
448
+ # suddenly finished the file
449
+ puts I18n.t(:warn_unexpected_terminator, line_number)
450
+ throw :terminate
451
+ when nil
452
+ # noop
453
+ else
454
+ if check
455
+ Utils.illegal_char_check(char, line_number)
456
+ end
457
+ push_chars(escape_special_chars(char))
458
+ end
459
+ end
460
+
461
+ # 本文が終了したかどうかチェックする
462
+ #
463
+ #
464
+ def ending_check
465
+ # `底本:`でフッタ(:tail)に遷移
466
+ return unless @stream.peek_char(0) == TEIHON_MARK[1] && @stream.peek_char(1) == TEIHON_MARK[2]
467
+
468
+ @section = :tail
469
+ ensure_close
470
+ @out.print "</div>\r\n<div class=\"bibliographical_information\">\r\n<hr />\r\n<br />\r\n"
471
+ end
472
+
473
+ def push_chars(obj)
474
+ case obj
475
+ when Array
476
+ obj.each do |x|
477
+ push_chars(x)
478
+ end
479
+ when String
480
+ obj.each_char do |x|
481
+ push_char(x)
482
+ end
483
+ else
484
+ push_char(obj)
485
+ end
486
+ end
487
+
488
+ def push_char(char)
489
+ @ruby_buf.push_char(char, @buffer)
490
+ end
491
+
492
+ # 読み込んだ行の出力を行う
493
+ #
494
+ # parserが改行文字を読み込んだら呼ばれる。
495
+ # 最終的に@ruby_bufと@bufferは初期化する
496
+ #
497
+ # @return [void]
498
+ #
499
+ def general_output
500
+ if @style_stack.last
501
+ raise Aozora2Html::Error, I18n.t(:dont_crlf_in_style, @style_stack.last_command)
502
+ end
503
+
504
+ # bufferにインデントタグだけがあったら改行しない!
505
+ if @noprint
506
+ @noprint = false
507
+ return
508
+ end
509
+ @ruby_buf.dump_into(@buffer)
510
+ buf = @buffer
511
+ @buffer = TextBuffer.new
512
+ tail = []
513
+
514
+ indent_type = buf.blank_type
515
+ terprip = buf.terpri? && @terprip
516
+ @terprip = true
517
+
518
+ if @indent_stack.last.is_a?(String) && !indent_type
519
+ @out.print @indent_stack.last
520
+ end
521
+
522
+ buf.each do |s|
523
+ if s.is_a?(Aozora2Html::Tag::OnelineIndent)
524
+ tail.unshift(s.close_tag)
525
+ elsif s.is_a?(Aozora2Html::Tag::UnEmbedGaiji) && !s.escaped?
526
+ # 消してあった※を復活させて
527
+ @out.print GAIJI_MARK
528
+ end
529
+ @out.print s.to_s
530
+ end
531
+
532
+ # 最後はCRLFを出力する
533
+ if @indent_stack.last.is_a?(String)
534
+ # ぶら下げindent
535
+ # tail always active
536
+ @out.print tail.map(&:to_s).join
537
+ if indent_type == :inline
538
+ @out.print "\r\n"
539
+ elsif indent_type && terprip
540
+ @out.print "<br />\r\n"
541
+ else
542
+ @out.print "</div>\r\n"
543
+ end
544
+ elsif tail.empty? && terprip
545
+ @out.print "<br />\r\n"
546
+ else
547
+ @out.print tail.map(&:to_s).join
548
+ @out.print "\r\n"
549
+ end
550
+ end
551
+
552
+ # 前方参照の発見 Ruby,style重ねがけ等々のため、要素の配列で返す
553
+ #
554
+ # 前方参照は`○○[#「○○」に傍点]`、`吹喋[#「喋」に「ママ」の注記]`といった表記
555
+ #
556
+ # @return [TextBuffer|false]
557
+ def search_front_reference(string)
558
+ if string.length == 0
559
+ return false
560
+ end
561
+
562
+ searching_buf = if @ruby_buf.present?
563
+ @ruby_buf.to_a
564
+ else
565
+ @buffer
566
+ end
567
+ last_string = searching_buf.last
568
+ case last_string
569
+ when String
570
+ if last_string == ''
571
+ searching_buf.pop
572
+ search_front_reference(string)
573
+ elsif last_string.match?(Regexp.new("#{Regexp.quote(string)}$"))
574
+ # 完全一致
575
+ # start = match.begin(0)
576
+ # tail = match.end(0)
577
+ # last_string[start,tail-start] = ""
578
+ searching_buf.pop
579
+ searching_buf.push(last_string.sub(Regexp.new("#{Regexp.quote(string)}$"), ''))
580
+ TextBuffer.new([string])
581
+ elsif string.match?(Regexp.new("#{Regexp.quote(last_string)}$"))
582
+ # 部分一致
583
+ tmp = searching_buf.pop
584
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(last_string)}$"), ''))
585
+ if found
586
+ found.push(tmp)
587
+ found
588
+ else
589
+ searching_buf.push(tmp)
590
+ false
591
+ end
592
+ end
593
+ when Aozora2Html::Tag::ReferenceMentioned
594
+ inner = last_string.target_string
595
+ if inner == string
596
+ # 完全一致
597
+ searching_buf.pop
598
+ TextBuffer.new([last_string])
599
+ elsif string.match?(Regexp.new("#{Regexp.quote(inner)}$"))
600
+ # 部分一致
601
+ tmp = searching_buf.pop
602
+ found = search_front_reference(string.sub(Regexp.new("#{Regexp.quote(inner)}$"), ''))
603
+ if found
604
+ found.push(tmp)
605
+ found
606
+ else
607
+ searching_buf.push(tmp)
608
+ false
609
+ end
610
+ end
611
+ else
612
+ false
613
+ end
614
+ end
615
+
616
+ # 発見した前方参照を元に戻す
617
+ #
618
+ # @ruby_bufがあれば@ruby_bufに、なければ@bufferにpushする
619
+ # バッファの最後と各要素が文字列ならconcatし、どちらが文字列でなければ(concatできないので)pushする
620
+ #
621
+ # @return [void]
622
+ #
623
+ def recovery_front_reference(reference)
624
+ reference.each do |elt|
625
+ # if @ruby_buf.protected
626
+ if @ruby_buf.present?
627
+ @ruby_buf.push(elt)
628
+ elsif @buffer.last.is_a?(String)
629
+ if elt.is_a?(String)
630
+ @buffer.last.concat(elt)
631
+ else
632
+ @buffer.push(elt)
633
+ end
634
+ else # rubocop:disable Lint/DuplicateBranch
635
+ @ruby_buf.push(elt)
636
+ end
637
+ end
638
+ end
639
+
640
+ def escape_gaiji(command)
641
+ _whole, kanji, line = command.match(PAT_GAIJI).to_a
642
+ tmp = @images.assoc(kanji)
643
+ if tmp
644
+ tmp.push(line)
645
+ else
646
+ @images.push([kanji, line])
647
+ end
648
+ Aozora2Html::Tag::UnEmbedGaiji.new(self, command)
649
+ end
650
+
651
+ def dispatch_gaiji
652
+ # 「※」の次が「[」でなければ外字ではない
653
+ if @stream.peek_char(0) != COMMAND_BEGIN
654
+ return GAIJI_MARK
655
+ end
656
+
657
+ # 「[」を読み捨てる
658
+ _ = read_char
659
+ # embed?
660
+ command, _raw = read_to_nest(COMMAND_END)
661
+ try_emb = kuten2png(command)
662
+ if try_emb != command
663
+ return try_emb
664
+ end
665
+
666
+ matched = command.match(/U\+([0-9A-F]{4,5})/)
667
+ if matched && Aozora2Html::Tag::EmbedGaiji.use_unicode
668
+ unicode_num = matched[1]
669
+ Aozora2Html::Tag::EmbedGaiji.new(self, nil, nil, command, unicode_num, gaiji_dir: @gaiji_dir)
670
+ else
671
+ # Unemb
672
+ escape_gaiji(command)
673
+ end
674
+ end
675
+
676
+ # 注記記法の場合分け
677
+ def dispatch_aozora_command
678
+ # 「[」の次が「#」でなければ注記ではない
679
+ if @stream.peek_char(0) != IGETA_MARK
680
+ return COMMAND_BEGIN
681
+ end
682
+
683
+ # 「#」を読み捨てる
684
+ _ = read_char
685
+ command, raw = read_to_nest(COMMAND_END)
686
+ # 適用順序はこれで大丈夫か? 誤爆怖いよ誤爆
687
+ if command.match?(ORIKAESHI_COMMAND)
688
+ apply_burasage(command)
689
+
690
+ elsif command.start_with?(OPEN_MARK)
691
+ exec_block_start_command(command)
692
+ elsif command.start_with?(CLOSE_MARK)
693
+ exec_block_end_command(command)
694
+
695
+ elsif command.match?(WARICHU_COMMAND)
696
+ apply_warichu(command)
697
+ elsif command.match?(JISAGE_COMMAND)
698
+ apply_jisage(command)
699
+ elsif command.match?(/fig(\d)+_(\d)+\.png/)
700
+ exec_img_command(command, raw)
701
+ # avoid to try complex ruby -- escape to notes
702
+ elsif command.match?(PAT_REST_NOTES)
703
+ apply_rest_notes(command)
704
+ elsif command.end_with?(END_MARK)
705
+ exec_inline_end_command(command)
706
+ nil
707
+ elsif command.match?(PAT_REF)
708
+ exec_frontref_command(command)
709
+ elsif command.match?(/1-7-8[2345]/)
710
+ apply_dakuten_katakana(command)
711
+ elsif command.match?(PAT_KAERITEN)
712
+ Aozora2Html::Tag::Kaeriten.new(self, command)
713
+ elsif command.match?(PAT_OKURIGANA)
714
+ Aozora2Html::Tag::Okurigana.new(self, command.gsub!(PAT_REMOVE_OKURIGANA, ''))
715
+ elsif command.match?(PAT_CHITSUKI)
716
+ apply_chitsuki(command)
717
+ elsif exec_inline_start_command(command)
718
+ nil
719
+ else # rubocop:disable Lint/DuplicateBranch
720
+ apply_rest_notes(command)
721
+ end
722
+ end
723
+
724
+ def apply_burasage(command)
725
+ tag = nil
726
+ if implicit_close(:jisage)
727
+ @terprip = false
728
+ general_output
729
+ end
730
+ @noprint = true # always no print
731
+ command = Utils.convert_japanese_number(command)
732
+ if command.match?(TENTSUKI_COMMAND)
733
+ width = command.match(PAT_ORIKAESHI_JISAGE)[1]
734
+ tag = "<div class=\"burasage\" style=\"margin-left: #{width}em; text-indent: -#{width}em;\">"
735
+ else
736
+ matched = command.match(PAT_ORIKAESHI_JISAGE2)
737
+ left, indent = matched.to_a[1, 2]
738
+ left = left.to_i - indent.to_i
739
+ tag = "<div class=\"burasage\" style=\"margin-left: #{indent}em; text-indent: #{left}em;\">"
740
+ end
741
+ @indent_stack.push(tag)
742
+ @tag_stack.push('') # dummy
743
+ nil
744
+ end
745
+
746
+ def jisage_width(command)
747
+ Utils.convert_japanese_number(command).match(/(\d*)(?:#{JISAGE_COMMAND})/o)[1]
748
+ end
749
+
750
+ def apply_jisage(command)
751
+ if command.match?(MADE_MARK) || command.match?(END_MARK)
752
+ # 字下げ終わり
753
+ explicit_close(:jisage)
754
+ @indent_stack.pop
755
+ nil
756
+ elsif command.match?(ONELINE_COMMAND)
757
+ # 1行だけ
758
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
759
+ nil
760
+ elsif (@buffer.length == 0) && (@stream.peek_char(0) == "\r\n")
761
+ # commandのみ
762
+ @terprip = false
763
+ implicit_close(:jisage)
764
+ # adhook hack
765
+ @noprint = false
766
+ @indent_stack.push(:jisage)
767
+ Aozora2Html::Tag::MultilineJisage.new(self, jisage_width(command))
768
+ else # rubocop:disable Lint/DuplicateBranch
769
+ @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
770
+ nil
771
+ end
772
+ end
773
+
774
+ def apply_warichu(command)
775
+ if command.match?(END_MARK)
776
+ if @stream.peek_char(0) != PAREN_END_MARK
777
+ push_char(PAREN_END_MARK)
778
+ end
779
+ push_char('</span>')
780
+ else
781
+ check = @ruby_buf.last
782
+
783
+ # NOTE: Do not remove duplicates!
784
+ # rubocop:disable Style/IdenticalConditionalBranches
785
+ if check.is_a?(String) && check.end_with?(PAREN_BEGIN_MARK)
786
+ push_char('<span class="warichu">')
787
+ else
788
+ push_char('<span class="warichu">')
789
+ push_char(PAREN_BEGIN_MARK)
790
+ end
791
+ # rubocop:enable Style/IdenticalConditionalBranches
792
+ end
793
+ nil
794
+ end
795
+
796
+ def chitsuki_length(command)
797
+ command = Utils.convert_japanese_number(command)
798
+ matched = command.match(PAT_JI_LEN)
799
+ if matched
800
+ matched[1]
801
+ else
802
+ '0'
803
+ end
804
+ end
805
+
806
+ def apply_chitsuki(string, multiline: false)
807
+ if string.match?(CLOSE_MARK + INDENT_TYPE[:chitsuki] + END_MARK) ||
808
+ string.match?(CLOSE_MARK + JIAGE_COMMAND + END_MARK)
809
+ explicit_close(:chitsuki)
810
+ @indent_stack.pop
811
+ nil
812
+ else
813
+ len = chitsuki_length(string)
814
+ if multiline
815
+ # 複数行指定
816
+ implicit_close(:chitsuki)
817
+ @indent_stack.push(:chitsuki)
818
+ Aozora2Html::Tag::MultilineChitsuki.new(self, len)
819
+ else
820
+ # 1行のみ
821
+ Aozora2Html::Tag::OnelineChitsuki.new(self, len)
822
+ end
823
+ end
824
+ end
825
+
826
+ def apply_midashi(command)
827
+ @indent_stack.push(:midashi)
828
+ if command.match?(DOGYO_MARK)
829
+ midashi_type = :dogyo
830
+ elsif command.match?(MADO_MARK)
831
+ midashi_type = :mado
832
+ else
833
+ midashi_type = :normal
834
+ @terprip = false
835
+ end
836
+ Aozora2Html::Tag::MultilineMidashi.new(self, command, midashi_type)
837
+ end
838
+
839
+ def apply_yokogumi(_command)
840
+ @indent_stack.push(:yokogumi)
841
+ Aozora2Html::Tag::MultilineYokogumi.new(self)
842
+ end
843
+
844
+ def apply_keigakomi(_command)
845
+ @indent_stack.push(:keigakomi)
846
+ Aozora2Html::Tag::Keigakomi.new(self)
847
+ end
848
+
849
+ def apply_caption(_command)
850
+ @indent_stack.push(:caption)
851
+ Aozora2Html::Tag::MultilineCaption.new(self)
852
+ end
853
+
854
+ def apply_jizume(command)
855
+ w = Utils.convert_japanese_number(command).match(/(\d*)(?:#{INDENT_TYPE[:jizume]})/)[1]
856
+ @indent_stack.push(:jizume)
857
+ Aozora2Html::Tag::Jizume.new(self, w)
858
+ end
859
+
860
+ def push_block_tag(tag, closing)
861
+ push_char(tag)
862
+ closing.concat(tag.close_tag)
863
+ end
864
+
865
+ def detect_style_size(style)
866
+ if style.match?('小'.to_sjis)
867
+ :sho
868
+ else
869
+ :dai
870
+ end
871
+ end
872
+
873
+ def exec_inline_start_command(command)
874
+ case command
875
+ when CHUUKI_COMMAND
876
+ @style_stack.push([command, '</ruby>'])
877
+ push_char('<ruby><rb>')
878
+ when TCY_COMMAND
879
+ @style_stack.push([command, '</span>'])
880
+ push_char('<span dir="ltr">')
881
+ when KEIGAKOMI_COMMAND
882
+ @style_stack.push([command, '</span>'])
883
+ push_char('<span class="keigakomi">')
884
+ when YOKOGUMI_COMMAND
885
+ @style_stack.push([command, '</span>'])
886
+ push_char('<span class="yokogumi">')
887
+ when CAPTION_COMMAND
888
+ @style_stack.push([command, '</span>'])
889
+ push_char('<span class="caption">')
890
+ when WARIGAKI_COMMAND
891
+ @style_stack.push([command, '</span>'])
892
+ push_char('<span class="warigaki">')
893
+ when OMIDASHI_COMMAND
894
+ @style_stack.push([command, '</a></h3>'])
895
+ @terprip = false
896
+ push_char("<h3 class=\"o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
897
+ when NAKAMIDASHI_COMMAND
898
+ @style_stack.push([command, '</a></h4>'])
899
+ @terprip = false
900
+ push_char("<h4 class=\"naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
901
+ when KOMIDASHI_COMMAND
902
+ @style_stack.push([command, '</a></h5>'])
903
+ @terprip = false
904
+ push_char("<h5 class=\"ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
905
+ when DOGYO_OMIDASHI_COMMAND
906
+ @style_stack.push([command, '</a></h3>'])
907
+ push_char("<h3 class=\"dogyo-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
908
+ when DOGYO_NAKAMIDASHI_COMMAND
909
+ @style_stack.push([command, '</a></h4>'])
910
+ push_char("<h4 class=\"dogyo-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
911
+ when DOGYO_KOMIDASHI_COMMAND
912
+ @style_stack.push([command, '</a></h5>'])
913
+ push_char("<h5 class=\"dogyo-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
914
+ when MADO_OMIDASHI_COMMAND
915
+ @style_stack.push([command, '</a></h3>'])
916
+ push_char("<h3 class=\"mado-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
917
+ when MADO_NAKAMIDASHI_COMMAND
918
+ @style_stack.push([command, '</a></h4>'])
919
+ push_char("<h4 class=\"mado-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
920
+ when MADO_KOMIDASHI_COMMAND
921
+ @style_stack.push([command, '</a></h5>'])
922
+ push_char("<h5 class=\"mado-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
923
+ when PAT_CHARSIZE
924
+ @style_stack.push([command, '</span>'])
925
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
926
+ times = Utils.convert_japanese_number(nest).to_i
927
+ daisho = detect_style_size(style)
928
+ html_class = daisho.to_s + times.to_s
929
+ size = Utils.create_font_size(times, daisho)
930
+ push_char("<span class=\"#{html_class}\" style=\"font-size: #{size};\">")
931
+ else
932
+ ## Decoration ##
933
+ key = command
934
+ filter = ->(x) { x }
935
+ if command.match?(PAT_DIRECTION)
936
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
937
+ # renew command
938
+ key = com
939
+ if command.match?(TEN_MARK)
940
+ case dir
941
+ when LEFT_MARK, UNDER_MARK
942
+ filter = ->(x) { "#{x}_after" }
943
+ end
944
+ elsif command.match?(SEN_MARK)
945
+ case dir
946
+ when LEFT_MARK, OVER_MARK
947
+ filter = ->(x) { x.sub('under', 'over') }
948
+ end
949
+ end
950
+ end
951
+
952
+ found = COMMAND_TABLE[key]
953
+ # found = [class, tag]
954
+ if found
955
+ @style_stack.push([command, "</#{found[1]}>"])
956
+ push_char("<#{found[1]} class=\"#{filter.call(found[0])}\">")
957
+ else
958
+ if $DEBUG
959
+ puts I18n.t(:warn_undefined_command, line_number, key)
960
+ end
961
+ nil
962
+ end
963
+ end
964
+ end
965
+
966
+ def exec_inline_end_command(command)
967
+ encount = command.sub(END_MARK, '')
968
+ if encount == MAIN_MARK
969
+ # force to finish main_text
970
+ @section = :tail
971
+ ensure_close
972
+ @noprint = true
973
+ @out.print "</div>\r\n<div class=\"after_text\">\r\n<hr />\r\n"
974
+ elsif encount.match?(CHUUKI_COMMAND) && (@style_stack.last_command == CHUUKI_COMMAND)
975
+ # special inline ruby
976
+ @style_stack.pop
977
+ _whole, ruby = encount.match(PAT_INLINE_RUBY).to_a
978
+ push_char('</rb><rp>(</rp><rt>'.to_sjis + ruby + '</rt><rp>)</rp></ruby>'.to_sjis)
979
+ elsif @style_stack.last_command.match?(encount)
980
+ push_char(@style_stack.pop[1])
981
+ else
982
+ raise Aozora2Html::Error, I18n.t(:invalid_nesting, encount, @style_stack.last_command)
983
+ end
984
+ end
985
+
986
+ def exec_block_start_command(command)
987
+ original_command = command.dup
988
+ command.sub!(/^#{OPEN_MARK}/o, '')
989
+ match_buf = +''
990
+ if command.match?(INDENT_TYPE[:jisage])
991
+ push_block_tag(apply_jisage(command), match_buf)
992
+ elsif command.match?(/(#{INDENT_TYPE[:chitsuki]}|#{JIAGE_COMMAND})$/)
993
+ push_block_tag(apply_chitsuki(command, multiline: true), match_buf)
994
+ end
995
+
996
+ if command.match?(INDENT_TYPE[:midashi])
997
+ push_block_tag(apply_midashi(command), match_buf)
998
+ end
999
+
1000
+ if command.match?(INDENT_TYPE[:jizume])
1001
+ if match_buf != ''
1002
+ @indent_stack.pop
1003
+ end
1004
+ push_block_tag(apply_jizume(command), match_buf)
1005
+ end
1006
+
1007
+ if command.match?(INDENT_TYPE[:yokogumi])
1008
+ if match_buf != ''
1009
+ @indent_stack.pop
1010
+ end
1011
+ push_block_tag(apply_yokogumi(command), match_buf)
1012
+ end
1013
+
1014
+ if command.match?(INDENT_TYPE[:keigakomi])
1015
+ if match_buf != ''
1016
+ @indent_stack.pop
1017
+ end
1018
+ push_block_tag(apply_keigakomi(command), match_buf)
1019
+ end
1020
+
1021
+ if command.match?(INDENT_TYPE[:caption])
1022
+ if match_buf != ''
1023
+ @indent_stack.pop
1024
+ end
1025
+ push_block_tag(apply_caption(command), match_buf)
1026
+ end
1027
+
1028
+ if command.match?(INDENT_TYPE[:futoji])
1029
+ if match_buf != ''
1030
+ @indent_stack.pop
1031
+ end
1032
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'futoji'), match_buf)
1033
+ @indent_stack.push(:futoji)
1034
+ end
1035
+ if command.match?(INDENT_TYPE[:shatai])
1036
+ if match_buf != ''
1037
+ @indent_stack.pop
1038
+ end
1039
+ push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, 'shatai'), match_buf)
1040
+ @indent_stack.push(:shatai)
1041
+ end
1042
+
1043
+ if command.match?(PAT_CHARSIZE)
1044
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1045
+ if match_buf != ''
1046
+ @indent_stack.pop
1047
+ end
1048
+ daisho = detect_style_size(style)
1049
+ push_block_tag(Aozora2Html::Tag::FontSize.new(self,
1050
+ Utils.convert_japanese_number(nest).to_i,
1051
+ daisho),
1052
+ match_buf)
1053
+ @indent_stack.push(daisho)
1054
+ end
1055
+
1056
+ if match_buf == ''
1057
+ apply_rest_notes(original_command)
1058
+ else
1059
+ @tag_stack.push(match_buf)
1060
+ nil
1061
+ end
1062
+ end
1063
+
1064
+ def exec_block_end_command(command)
1065
+ original_command = command.dup
1066
+ command.sub!(/^#{CLOSE_MARK}/o, '')
1067
+ matched = false
1068
+ mode = detect_command_mode(command)
1069
+ if mode
1070
+ explicit_close(mode)
1071
+ matched = @indent_stack.pop
1072
+ end
1073
+
1074
+ if matched
1075
+ unless matched.is_a?(String)
1076
+ @terprip = false
1077
+ end
1078
+ nil
1079
+ else
1080
+ apply_rest_notes(original_command)
1081
+ end
1082
+ end
1083
+
1084
+ def exec_img_command(command, raw)
1085
+ matched = raw.match(PAT_IMAGE)
1086
+ if matched
1087
+ _whole, alt, src, _wh, width, height = matched.to_a
1088
+ css_class = if alt.match?(PHOTO_COMMAND)
1089
+ 'photo'
1090
+ else
1091
+ 'illustration'
1092
+ end
1093
+ Aozora2Html::Tag::Img.new(self, src, css_class, alt, width, height)
1094
+ else
1095
+ apply_rest_notes(command)
1096
+ end
1097
+ end
1098
+
1099
+ def exec_frontref_command(command)
1100
+ _whole, reference, spec1, spec2 = command.match(PAT_FRONTREF).to_a
1101
+ spec = if spec1
1102
+ spec1 + spec2
1103
+ else
1104
+ spec2
1105
+ end
1106
+ if reference
1107
+ found = search_front_reference(reference)
1108
+ if found
1109
+ tmp = exec_style(found, spec)
1110
+ return tmp if tmp
1111
+
1112
+ recovery_front_reference(found)
1113
+ end
1114
+ end
1115
+ # comment out?
1116
+ apply_rest_notes(command)
1117
+ end
1118
+
1119
+ # 傍記を並べる用
1120
+ #
1121
+ def multiply(bouki, times)
1122
+ sep = '&nbsp;'
1123
+ ([bouki] * times).join(sep)
1124
+ end
1125
+
1126
+ # rubyタグの再生成(本体はrearrange_ruby)
1127
+ #
1128
+ # complex ruby wrap up utilities -- don't erase! we will use soon ...
1129
+ #
1130
+ def rearrange_ruby_tag(targets, upper_ruby, under_ruby)
1131
+ Aozora2Html::Tag::Ruby.rearrange_ruby(self, targets, upper_ruby, under_ruby)
1132
+ end
1133
+
1134
+ def exec_style(targets, command)
1135
+ try_kuten = kuten2png(command)
1136
+ if try_kuten != command
1137
+ try_kuten
1138
+ elsif command.match?(TCY_COMMAND)
1139
+ Aozora2Html::Tag::Dir.new(self, targets)
1140
+ elsif command.match?(YOKOGUMI_COMMAND)
1141
+ Aozora2Html::Tag::InlineYokogumi.new(self, targets)
1142
+ elsif command.match?(KEIGAKOMI_COMMAND)
1143
+ Aozora2Html::Tag::InlineKeigakomi.new(self, targets)
1144
+ elsif command.match?(CAPTION_COMMAND)
1145
+ Aozora2Html::Tag::InlineCaption.new(self, targets)
1146
+ elsif command.match?(KAERITEN_COMMAND)
1147
+ Aozora2Html::Tag::Kaeriten.new(self, targets)
1148
+ elsif command.match?(KUNTEN_OKURIGANA_COMMAND)
1149
+ Aozora2Html::Tag::Okurigana.new(self, targets)
1150
+ elsif command.match?(MIDASHI_COMMAND)
1151
+ midashi_type = :normal
1152
+ if command.match?(DOGYO_MARK)
1153
+ midashi_type = :dogyo
1154
+ elsif command.match?(MADO_MARK)
1155
+ midashi_type = :mado
1156
+ else
1157
+ @terprip = false
1158
+ end
1159
+ Aozora2Html::Tag::Midashi.new(self, targets, command, midashi_type)
1160
+ elsif command.match?(PAT_CHARSIZE)
1161
+ _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1162
+ Aozora2Html::Tag::InlineFontSize.new(self, targets,
1163
+ Utils.convert_japanese_number(nest).to_i,
1164
+ detect_style_size(style))
1165
+ elsif command.match?(PAT_RUBY_DIR)
1166
+ _whole, _dir, under = command.match(PAT_RUBY_DIR).to_a
1167
+ if (targets.length == 1) && targets[0].is_a?(Aozora2Html::Tag::Ruby)
1168
+ tag = targets[0]
1169
+ raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby) unless tag.under_ruby == ''
1170
+
1171
+ tag.under_ruby = under
1172
+ tag
1173
+ else
1174
+ rearrange_ruby_tag(targets, '', under)
1175
+ end
1176
+ elsif command.match?(PAT_CHUUKI)
1177
+ rearrange_ruby_tag(targets, PAT_CHUUKI.match(command).to_a[1], '')
1178
+ elsif command.match?(PAT_BOUKI)
1179
+ rearrange_ruby_tag(targets, multiply(PAT_BOUKI.match(command).to_a[1], targets.to_s.length), '')
1180
+ else
1181
+ ## direction fix! ##
1182
+ filter = ->(x) { x }
1183
+ if command.match?(PAT_DIRECTION)
1184
+ _whole, dir, com = command.match(PAT_DIRECTION).to_a
1185
+ # renew command
1186
+ command = com
1187
+ if command.match?(TEN_MARK)
1188
+ case dir
1189
+ when LEFT_MARK, UNDER_MARK
1190
+ filter = ->(x) { "#{x}_after" }
1191
+ end
1192
+ elsif command.match?(SEN_MARK)
1193
+ case dir
1194
+ when LEFT_MARK, OVER_MARK
1195
+ filter = ->(x) { x.sub('under', 'over') }
1196
+ end
1197
+ end
1198
+ end
1199
+
1200
+ found = COMMAND_TABLE[command]
1201
+ # found = [class, tag]
1202
+ if found
1203
+ Aozora2Html::Tag::Decorate.new(self, targets, filter.call(found[0]), found[1])
1204
+ end
1205
+ end
1206
+ end
1207
+
1208
+ def apply_dakuten_katakana(command)
1209
+ n = command.match(/1-7-8([2345])/).to_a[1]
1210
+ frontref = DAKUTEN_KATAKANA_TABLE[n]
1211
+ found = search_front_reference(frontref)
1212
+ if found
1213
+ Aozora2Html::Tag::DakutenKatakana.new(self, n, found.join, gaiji_dir: @gaiji_dir)
1214
+ else
1215
+ apply_rest_notes(command)
1216
+ end
1217
+ end
1218
+
1219
+ # くの字点の処理
1220
+ #
1221
+ # くの字点は現状そのまま出力するのでフッタの「表記について」で出力するかどうかのフラグ処理だけ行う
1222
+ def assign_kunoji
1223
+ second = @stream.peek_char(0)
1224
+ case second
1225
+ when NOJI
1226
+ @chuuki_table[:kunoji] = true
1227
+ when DAKUTEN
1228
+ if @stream.peek_char(1) == NOJI
1229
+ @chuuki_table[:dakutenkunoji] = true
1230
+ end
1231
+ end
1232
+ end
1233
+
1234
+ def apply_rest_notes(command)
1235
+ @chuuki_table[:chuki] = true
1236
+ Aozora2Html::Tag::EditorNote.new(self, command)
1237
+ end
1238
+
1239
+ # |が来たときは文字種を無視してruby_bufを守らなきゃいけない
1240
+ def apply_ruby
1241
+ @ruby_buf.protected = nil
1242
+ ruby, _raw = read_to_nest(RUBY_END_MARK)
1243
+ if ruby.length == 0
1244
+ # escaped ruby character
1245
+ return RUBY_BEGIN_MARK + RUBY_END_MARK
1246
+ end
1247
+
1248
+ @buffer.concat(@ruby_buf.create_ruby(self, ruby))
1249
+
1250
+ nil
1251
+ end
1252
+
1253
+ # parse_bodyのフッタ版
1254
+ def parse_tail
1255
+ char = read_char
1256
+ check = true
1257
+ case char
1258
+ when ACCENT_BEGIN
1259
+ check = false
1260
+ char = read_accent
1261
+ when @endchar
1262
+ throw :terminate
1263
+ when GAIJI_MARK
1264
+ char = dispatch_gaiji
1265
+ when COMMAND_BEGIN
1266
+ char = dispatch_aozora_command
1267
+ when KU
1268
+ assign_kunoji
1269
+ when RUBY_BEGIN_MARK
1270
+ char = apply_ruby
1271
+ end
1272
+
1273
+ case char
1274
+ when "\r\n"
1275
+ tail_output
1276
+ when RUBY_PREFIX
1277
+ @ruby_buf.dump_into(@buffer)
1278
+ @ruby_buf.protected = true
1279
+ when nil
1280
+ # noop
1281
+ else
1282
+ if check
1283
+ Utils.illegal_char_check(char, line_number)
1284
+ end
1285
+ push_chars(escape_special_chars(char))
1286
+ end
1287
+ end
1288
+
1289
+ # general_outputのフッタ版
1290
+ def tail_output
1291
+ @ruby_buf.dump_into(@buffer)
1292
+ string = @buffer.join
1293
+ @buffer = TextBuffer.new
1294
+ string.gsub!('info@aozora.gr.jp', '<a href="mailto: info@aozora.gr.jp">info@aozora.gr.jp</a>')
1295
+ string.gsub!('青空文庫(http://www.aozora.gr.jp/)'.to_sjis) { "<a href=\"http://www.aozora.gr.jp/\">#{$&}</a>" }
1296
+ if string.match?(%r{(<br />$|</p>$|</h\d>$|<div.*>$|</div>$|^<[^>]*>$)})
1297
+ @out.print string, "\r\n"
1298
+ else
1299
+ @out.print string, "<br />\r\n"
1300
+ end
1301
+ end
1302
+
1303
+ # `●表記について`で使用した注記等を出力する
1304
+ def hyoki
1305
+ # <br /> times fix
1306
+ @out.print "<br />\r\n</div>\r\n<div class=\"notation_notes\">\r\n<hr />\r\n<br />\r\n●表記について<br />\r\n<ul>\r\n".to_sjis
1307
+ @out.print "\t<li>このファイルは W3C 勧告 XHTML1.1 にそった形式で作成されています。</li>\r\n".to_sjis
1308
+ if @chuuki_table[:chuki]
1309
+ @out.print "\t<li>[#…]は、入力者による注を表す記号です。</li>\r\n".to_sjis
1310
+ end
1311
+ if @chuuki_table[:kunoji]
1312
+ if @chuuki_table[:dakutenkunoji]
1313
+ @out.printf("\t<li>「くの字点」は「%s」で、「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI, KU + DAKUTEN + NOJI)
1314
+ else
1315
+ @out.printf("\t<li>「くの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + NOJI)
1316
+ end
1317
+ elsif @chuuki_table[:dakutenkunoji]
1318
+ @out.printf("\t<li>「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU + DAKUTEN + NOJI)
1319
+ end
1320
+ if @chuuki_table[:newjis] && !Aozora2Html::Tag::EmbedGaiji.use_jisx0213
1321
+ @out.print "\t<li>「くの字点」をのぞくJIS X 0213にある文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1322
+ end
1323
+ if @chuuki_table[:accent] && !Aozora2Html::Tag::Accent.use_jisx0213
1324
+ @out.print "\t<li>アクセント符号付きラテン文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1325
+ end
1326
+ if @images[0]
1327
+ @out.print "\t<li>この作品には、JIS X 0213にない、以下の文字が用いられています。(数字は、底本中の出現「ページ-行」数。)これらの文字は本文内では「※[#…]」の形で示しました。</li>\r\n</ul>\r\n<br />\r\n\t\t<table class=\"gaiji_list\">\r\n".to_sjis
1328
+ @images.each do |cell|
1329
+ k, *v = cell
1330
+ vs = v.join('、'.to_sjis)
1331
+ @out.print " <tr>
1332
+ <td>
1333
+ #{k}
1334
+ </td>
1335
+ <td>&nbsp;&nbsp;</td>
1336
+ <td>
1337
+ #{vs} </td>
1338
+ <!--
1339
+ <td>
1340
+ " + '  '.to_sjis + "<img src=\"../../../gaiji/others/xxxx.png\" alt=\"#{k}\" width=32 height=32 />
1341
+ </td>
1342
+ -->
1343
+ </tr>
1344
+ ".to_sjis
1345
+ end
1346
+ @out.print "\t\t</table>\r\n".to_sjis
1347
+ else
1348
+ @out.print "</ul>\r\n" # <ul>内に<li>以外のエレメントが来るのは不正なので修正
1349
+ end
1350
+ @out.print "</div>\r\n"
1351
+ end
1352
+
1353
+ # Original Aozora2Html#push_chars does not convert "'" into '&#39;'; it's old behaivor of CGI.escapeHTML().
1354
+ def escape_special_chars(char)
1355
+ if char.is_a?(String)
1356
+ char.gsub(/[&"<>]/, { '&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;' })
1357
+ else
1358
+ char
1359
+ end
1360
+ end
6
1361
  end