aozora2html 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +4 -1
  4. data/.rubocop.yml +36 -152
  5. data/.rubocop_todo.yml +7 -0
  6. data/CHANGELOG.md +26 -0
  7. data/Gemfile +2 -0
  8. data/Guardfile +3 -1
  9. data/HACKING.md +45 -0
  10. data/README.md +14 -6
  11. data/Rakefile +12 -5
  12. data/aozora2html.gemspec +24 -22
  13. data/bin/aozora2html +21 -19
  14. data/lib/aozora2html/accent_parser.rb +62 -54
  15. data/lib/aozora2html/error.rb +5 -4
  16. data/lib/aozora2html/header.rb +20 -18
  17. data/lib/aozora2html/i18n.rb +40 -20
  18. data/lib/aozora2html/ruby_buffer.rb +63 -28
  19. data/lib/aozora2html/string_refinements.rb +36 -0
  20. data/lib/aozora2html/style_stack.rb +6 -0
  21. data/lib/aozora2html/tag/accent.rb +10 -12
  22. data/lib/aozora2html/tag/block.rb +11 -9
  23. data/lib/aozora2html/tag/chitsuki.rb +6 -2
  24. data/lib/aozora2html/tag/dakuten_katakana.rb +10 -8
  25. data/lib/aozora2html/tag/decorate.rb +4 -3
  26. data/lib/aozora2html/tag/dir.rb +4 -2
  27. data/lib/aozora2html/tag/editor_note.rb +7 -4
  28. data/lib/aozora2html/tag/embed_gaiji.rb +15 -11
  29. data/lib/aozora2html/tag/font_size.rb +5 -2
  30. data/lib/aozora2html/tag/gaiji.rb +4 -3
  31. data/lib/aozora2html/tag/img.rb +4 -4
  32. data/lib/aozora2html/tag/indent.rb +3 -3
  33. data/lib/aozora2html/tag/inline.rb +10 -7
  34. data/lib/aozora2html/tag/inline_caption.rb +4 -2
  35. data/lib/aozora2html/tag/inline_font_size.rb +4 -3
  36. data/lib/aozora2html/tag/inline_keigakomi.rb +4 -2
  37. data/lib/aozora2html/tag/inline_yokogumi.rb +4 -3
  38. data/lib/aozora2html/tag/jisage.rb +3 -1
  39. data/lib/aozora2html/tag/jizume.rb +3 -0
  40. data/lib/aozora2html/tag/kaeriten.rb +4 -2
  41. data/lib/aozora2html/tag/keigakomi.rb +15 -9
  42. data/lib/aozora2html/tag/kunten.rb +4 -4
  43. data/lib/aozora2html/tag/midashi.rb +3 -1
  44. data/lib/aozora2html/tag/multiline.rb +3 -0
  45. data/lib/aozora2html/tag/multiline_caption.rb +6 -8
  46. data/lib/aozora2html/tag/multiline_chitsuki.rb +3 -1
  47. data/lib/aozora2html/tag/multiline_jisage.rb +3 -1
  48. data/lib/aozora2html/tag/multiline_midashi.rb +6 -3
  49. data/lib/aozora2html/tag/multiline_style.rb +5 -3
  50. data/lib/aozora2html/tag/multiline_yokogumi.rb +6 -9
  51. data/lib/aozora2html/tag/okurigana.rb +4 -2
  52. data/lib/aozora2html/tag/oneline_chitsuki.rb +3 -2
  53. data/lib/aozora2html/tag/oneline_indent.rb +8 -1
  54. data/lib/aozora2html/tag/oneline_jisage.rb +3 -0
  55. data/lib/aozora2html/tag/reference_mentioned.rb +22 -21
  56. data/lib/aozora2html/tag/ruby.rb +174 -70
  57. data/lib/aozora2html/tag/un_embed_gaiji.rb +8 -2
  58. data/lib/aozora2html/tag.rb +40 -38
  59. data/lib/aozora2html/tag_parser.rb +23 -16
  60. data/lib/aozora2html/text_buffer.rb +50 -0
  61. data/lib/aozora2html/utils.rb +113 -50
  62. data/lib/aozora2html/version.rb +3 -1
  63. data/lib/aozora2html/yaml_loader.rb +8 -2
  64. data/lib/aozora2html/zip.rb +4 -0
  65. data/lib/aozora2html.rb +1358 -3
  66. data/lib/extensions.rb +2 -34
  67. data/lib/jstream.rb +96 -25
  68. data/sample/chukiichiran_kinyurei.html +15 -2
  69. data/sample/chukiichiran_kinyurei.txt +15 -2
  70. data/test/test_aozora2html.rb +137 -148
  71. data/test/test_aozora_accent_parser.rb +26 -9
  72. data/test/test_command_parse.rb +25 -22
  73. data/test/test_compat.rb +3 -4
  74. data/test/test_dakuten_katakana_tag.rb +10 -12
  75. data/test/test_decorate_tag.rb +9 -6
  76. data/test/test_dir_tag.rb +9 -6
  77. data/test/test_editor_note_tag.rb +8 -5
  78. data/test/test_exception.rb +10 -8
  79. data/test/test_font_size_tag.rb +16 -13
  80. data/test/test_gaiji_tag.rb +15 -14
  81. data/test/test_header.rb +25 -40
  82. data/test/test_helper.rb +3 -1
  83. data/test/test_i18n.rb +22 -6
  84. data/test/test_img_tag.rb +9 -5
  85. data/test/test_inline_caption_tag.rb +9 -6
  86. data/test/test_inline_font_size_tag.rb +13 -10
  87. data/test/test_inline_keigakomi_tag.rb +9 -6
  88. data/test/test_inline_yokogumi_tag.rb +9 -6
  89. data/test/test_jizume_tag.rb +9 -7
  90. data/test/test_jstream.rb +33 -30
  91. data/test/test_kaeriten_tag.rb +9 -6
  92. data/test/test_keigakomi_tag.rb +11 -9
  93. data/test/test_midashi_tag.rb +15 -14
  94. data/test/test_multiline_caption_tag.rb +7 -5
  95. data/test/test_multiline_midashi_tag.rb +24 -25
  96. data/test/test_multiline_style_tag.rb +9 -7
  97. data/test/test_multiline_yokogumi_tag.rb +7 -5
  98. data/test/test_okurigana_tag.rb +9 -6
  99. data/test/test_ruby_parse.rb +14 -14
  100. data/test/test_ruby_tag.rb +9 -6
  101. data/test/test_tag_parser.rb +28 -26
  102. metadata +60 -14
  103. data/.travis.yml +0 -12
  104. data/lib/t2hs.rb +0 -1607
data/lib/t2hs.rb DELETED
@@ -1,1607 +0,0 @@
1
- # -*- coding:utf-8 -*-
2
- # 青空文庫形式のテキストファイルを html に整形する ruby スクリプト
3
- require "extensions"
4
- require "aozora2html/error"
5
- require "jstream"
6
- require "aozora2html/tag"
7
- require "aozora2html/tag_parser"
8
- require "aozora2html/accent_parser"
9
- require "aozora2html/style_stack"
10
- require "aozora2html/header"
11
- require "aozora2html/ruby_buffer"
12
- require "aozora2html/yaml_loader"
13
- require "aozora2html/utils"
14
-
15
- $gaiji_dir = "../../../gaiji/"
16
-
17
- $css_files = Array["../../aozora.css"]
18
-
19
- # 変換器本体
20
- class Aozora2Html
21
-
22
- # 全角バックスラッシュが出せないから直打ち
23
- KU = ["18e5"].pack("h*").force_encoding("shift_jis")
24
- NOJI = ["18f5"].pack("h*").force_encoding("shift_jis")
25
- DAKUTEN = ["18d8"].pack("h*").force_encoding("shift_jis")
26
- GAIJI_MARK = "※".to_sjis
27
- IGETA_MARK = "#".to_sjis
28
- RUBY_BEGIN_MARK = "《".to_sjis
29
- RUBY_END_MARK = "》".to_sjis
30
- PAREN_BEGIN_MARK = "(".to_sjis
31
- PAREN_END_MARK = ")".to_sjis
32
- SIZE_SMALL = "小".to_sjis
33
- SIZE_MIDDLE = "中".to_sjis
34
- SIZE_LARGE = "大".to_sjis
35
- TEIHON_MARK = "底本:".to_sjis
36
- COMMAND_BEGIN = "[".to_sjis
37
- COMMAND_END = "]".to_sjis
38
- ACCENT_BEGIN = "〔".to_sjis
39
- ACCENT_END = "〕".to_sjis
40
- AOZORABUNKO = "青空文庫".to_sjis
41
- #PAT_EDITOR = /[校訂|編|編集|編集校訂|校訂編集]$/
42
- PAT_EDITOR = "(校訂|編|編集)$".to_sjis
43
- PAT_HENYAKU = "編訳$".to_sjis
44
- PAT_TRANSLATOR = "訳$".to_sjis
45
- RUBY_PREFIX = "|".to_sjis
46
- PAT_RUBY = /#{"《.*?》".to_sjis}/
47
- PAT_DIRECTION = "(右|左|上|下)に(.*)".to_sjis
48
- PAT_REF = "^「.+」".to_sjis
49
- CHUUKI_COMMAND = "注記付き".to_sjis
50
- TCY_COMMAND = "縦中横".to_sjis
51
- KEIGAKOMI_COMMAND = "罫囲み".to_sjis
52
- YOKOGUMI_COMMAND = "横組み".to_sjis
53
- CAPTION_COMMAND = "キャプション".to_sjis
54
- WARIGAKI_COMMAND = "割書".to_sjis
55
- KAERITEN_COMMAND = "返り点".to_sjis
56
- KUNTEN_OKURIGANA_COMMAND = "訓点送り仮名".to_sjis
57
- MIDASHI_COMMAND = "見出し".to_sjis
58
- OMIDASHI_COMMAND = "大見出し".to_sjis
59
- NAKAMIDASHI_COMMAND = "中見出し".to_sjis
60
- KOMIDASHI_COMMAND = "小見出し".to_sjis
61
- DOGYO_OMIDASHI_COMMAND = "同行大見出し".to_sjis
62
- DOGYO_NAKAMIDASHI_COMMAND = "同行中見出し".to_sjis
63
- DOGYO_KOMIDASHI_COMMAND = "同行小見出し".to_sjis
64
- MADO_OMIDASHI_COMMAND = "窓大見出し".to_sjis
65
- MADO_NAKAMIDASHI_COMMAND = "窓中見出し".to_sjis
66
- MADO_KOMIDASHI_COMMAND = "窓小見出し".to_sjis
67
- LEFT_MARK = "左".to_sjis
68
- UNDER_MARK = "下".to_sjis
69
- OVER_MARK = "上".to_sjis
70
- MAIN_MARK = "本文".to_sjis
71
- END_MARK = "終わり".to_sjis
72
- TEN_MARK = "点".to_sjis
73
- SEN_MARK = "線".to_sjis
74
- OPEN_MARK = "ここから".to_sjis
75
- CLOSE_MARK = "ここで".to_sjis
76
- MADE_MARK = "まで".to_sjis
77
- DOGYO_MARK = "同行".to_sjis
78
- MADO_MARK = "窓".to_sjis
79
- JIAGE_COMMAND = "字上げ".to_sjis
80
- JISAGE_COMMAND = "字下げ".to_sjis
81
- PHOTO_COMMAND = "写真".to_sjis
82
- ORIKAESHI_COMMAND = "折り返して".to_sjis
83
- ONELINE_COMMAND = "この行".to_sjis
84
- NON_0213_GAIJI = "非0213外字".to_sjis
85
- WARICHU_COMMAND = "割り注".to_sjis
86
- TENTSUKI_COMMAND = "天付き".to_sjis
87
- PAT_REST_NOTES = "(左|下)に「(.*)」の(ルビ|注記|傍記)".to_sjis
88
- PAT_KUTEN = /#{"「※」[は|の]".to_sjis}/
89
- PAT_KUTEN_DUAL = "※.*※".to_sjis
90
- PAT_GAIJI = "(?:#)(.*)(?:、)(.*)".to_sjis
91
- PAT_KAERITEN = "^([一二三四五六七八九十レ上中下甲乙丙丁天地人]+)$".to_sjis
92
- PAT_OKURIGANA = "^((.+))$".to_sjis
93
- PAT_REMOVE_OKURIGANA = /#{"[()]".to_sjis}/
94
- PAT_CHITSUKI = /#{"(地付き|字上げ)(終わり)*$".to_sjis}/
95
- PAT_ORIKAESHI_JISAGE = "折り返して(\\d*)字下げ".to_sjis
96
- PAT_ORIKAESHI_JISAGE2 = "(\\d*)字下げ、折り返して(\\d*)字下げ".to_sjis
97
- PAT_JI_LEN = "([0-9]+)字".to_sjis
98
- PAT_INLINE_RUBY = "「(.*)」の注記付き".to_sjis
99
- PAT_IMAGE = "(.*)((fig.+\\.png)(、横([0-9]+)×縦([0-9]+))*)入る".to_sjis
100
- PAT_FRONTREF = "「([^「」]*(?:「.+」)*[^「」]*)」[にはの](「.+」の)*(.+)".to_sjis
101
- PAT_RUBY_DIR = "(左|下)に「([^」]*)」の(ルビ|注記)".to_sjis
102
- PAT_CHUUKI = /#{"「(.+?)」の注記".to_sjis}/
103
- PAT_BOUKI = /#{"「(.)」の傍記".to_sjis}/
104
- PAT_CHARSIZE = /#{"(.*)段階(..)な文字".to_sjis}/
105
-
106
- DYNAMIC_CONTENTS = ("<div id=\"card\">\r\n<hr />\r\n<br />\r\n" +
107
- "<a href=\"JavaScript:goLibCard();\" id=\"goAZLibCard\">●図書カード</a>" +
108
- "<script type=\"text/javascript\" src=\"../../contents.js\"></script>\r\n" +
109
- "<script type=\"text/javascript\" src=\"../../golibcard.js\"></script>\r\n" +
110
- "</div>").to_sjis
111
-
112
- # KUNOJI = ["18e518f5"].pack("h*")
113
- # utf8 ["fecbf8fecbcb"].pack("h*")
114
- # DAKUTENKUNOJI = ["18e518d818f5"].pack("h*")
115
- # utf8 ["fecbf82e083bfecbcb"].pack("h*")
116
-
117
- loader = Aozora2Html::YamlLoader.new(File.dirname(__FILE__))
118
- ACCENT_TABLE = loader.load("../yml/accent_table.yml")
119
-
120
- # [class, tag]
121
- COMMAND_TABLE = loader.load("../yml/command_table.yml")
122
- JIS2UCS = loader.load("../yml/jis2ucs.yml")
123
-
124
- INDENT_TYPE = {
125
- :jisage => "字下げ".to_sjis,
126
- :chitsuki => "地付き".to_sjis,
127
- :midashi => "見出し".to_sjis,
128
- :jizume => "字詰め".to_sjis,
129
- :yokogumi => "横組み".to_sjis,
130
- :keigakomi => "罫囲み".to_sjis,
131
- :caption => "キャプション".to_sjis,
132
- :futoji => "太字".to_sjis,
133
- :shatai => "斜体".to_sjis,
134
- :dai => "大きな文字".to_sjis,
135
- :sho => "小さな文字".to_sjis,
136
- }
137
-
138
- DAKUTEN_KATAKANA_TABLE = {
139
- "2" => "ワ゛".to_sjis,
140
- "3" => "ヰ゛".to_sjis,
141
- "4" => "ヱ゛".to_sjis,
142
- "5" => "ヲ゛".to_sjis,
143
- }
144
-
145
- def initialize(input, output)
146
- if input.respond_to?(:read) ## readable IO?
147
- @stream = Jstream.new(input)
148
- else
149
- @stream = Jstream.new(File.open(input,"rb:Shift_JIS"))
150
- end
151
- if output.respond_to?(:print) ## writable IO?
152
- @out = output
153
- else
154
- @out = File.open(output,"w")
155
- end
156
- @buffer = []
157
- @ruby_buf = RubyBuffer.new
158
- @section = :head ## 現在処理中のセクション(:head,:head_end,:chuuki,:chuuki_in,:body,:tail)
159
- @header = Aozora2Html::Header.new() ## ヘッダ行の配列
160
- @style_stack = StyleStack.new ##スタイルのスタック
161
- @chuuki_table = {} ## 最後にどの注記を出すかを保持しておく
162
- @images = [] ## 使用した外字の画像保持用
163
- @indent_stack = [] ## 基本はシンボルだが、ぶらさげのときはdivタグの文字列が入る
164
- @tag_stack = []
165
- @midashi_id = 0 ## 見出しのカウンタ、見出しの種類によって増分が異なる
166
- @terprip = true ## 改行制御用 (terpriはLisp由来?)
167
- @endchar = :eof ## 解析終了文字、AccentParserやTagParserでは異なる
168
- @noprint = nil ## 行末を読み込んだとき、何も出力しないかどうかのフラグ
169
- end
170
-
171
- def line_number
172
- @stream.line
173
- end
174
-
175
- def block_allowed_context?
176
- # inline_tagが開いていないかチェックすれば十分
177
- @style_stack.empty?
178
- end
179
-
180
- # 一文字読み込む
181
- def read_char
182
- @stream.read_char
183
- end
184
-
185
- # 指定された終端文字(1文字のStringかCRLF)まで読み込む
186
- #
187
- # @param [String] endchar 終端文字
188
- def read_to(endchar)
189
- buf = ""
190
- loop do
191
- char = @stream.read_char
192
- if char == endchar
193
- break
194
- else
195
- if char.kind_of?(Symbol)
196
- print endchar
197
- end
198
- buf.concat(char)
199
- end
200
- end
201
- buf
202
- end
203
-
204
- def read_accent
205
- Aozora2Html::AccentParser.new(@stream, ACCENT_END, @chuuki_table, @images).process
206
- end
207
-
208
- def read_to_nest(endchar)
209
- Aozora2Html::TagParser.new(@stream, endchar, @chuuki_table, @images).process
210
- end
211
-
212
- # 1行読み込み
213
- #
214
- # 合わせて@bufferもクリアする
215
- # @return [String] 読み込んだ文字列を返す
216
- #
217
- def read_line
218
- tmp = read_to("\r\n")
219
- @buffer = []
220
- tmp
221
- end
222
-
223
- # parseする
224
- #
225
- # 終了時(終端まで来た場合)にはthrow :terminateで脱出する
226
- #
227
- def process
228
- begin
229
- catch(:terminate) do
230
- loop do
231
- begin
232
- parse
233
- rescue Aozora2Html::Error => e
234
- puts e.message(line_number)
235
- if e.is_a?(Aozora2Html::Error)
236
- exit(2)
237
- end
238
- end
239
- end
240
- end
241
- tail_output # final call
242
- finalize
243
- close
244
- rescue => e
245
- puts "ERROR: line: #{line_number}"
246
- raise e
247
- end
248
- end
249
-
250
- def char_type(char)
251
- begin
252
- ## `String#char_type`も定義されているのに注意
253
- char.char_type
254
- rescue
255
- :else
256
- end
257
- end
258
-
259
- def finalize
260
- hyoki
261
- dynamic_contents
262
- @out.print("</body>\r\n</html>\r\n")
263
- end
264
-
265
- def dynamic_contents
266
- @out.print DYNAMIC_CONTENTS
267
- end
268
-
269
- def close
270
- @stream.close
271
- @out.close
272
- end
273
-
274
- # 記法のシンボル名から文字列へ変換する
275
- # シンボルが見つからなければそのまま返す
276
- def convert_indent_type(type)
277
- INDENT_TYPE[type] || type
278
- end
279
-
280
- def check_close_match(type)
281
- ind = if @indent_stack.last.is_a?(String)
282
- @noprint = true
283
- :jisage
284
- else
285
- @indent_stack.last
286
- end
287
- if ind == type
288
- nil
289
- else
290
- convert_indent_type(type)
291
- end
292
- end
293
-
294
- def implicit_close(type)
295
- if @indent_stack.last
296
- if check_close_match(type)
297
- # ok, nested multiline tags, go ahead
298
- else
299
- # not nested, please close
300
- @indent_stack.pop
301
- if tag = @tag_stack.pop
302
- push_chars(tag)
303
- end
304
- end
305
- end
306
- end
307
-
308
- # 本文が終わってよいかチェックし、終わっていなければ例外をあげる
309
- def ensure_close
310
- if n = @indent_stack.last
311
- raise Aozora2Html::Error, I18n.t(:terminate_in_style, convert_indent_type(n))
312
- end
313
- end
314
-
315
- def explicit_close(type)
316
- n = check_close_match(type)
317
- if n
318
- raise Aozora2Html::Error, I18n.t(:invalid_closing, n, n)
319
- end
320
- if tag = @tag_stack.pop
321
- push_chars(tag)
322
- end
323
- end
324
-
325
- # main loop
326
- def parse
327
- case @section
328
- when :head
329
- parse_header
330
- when :head_end
331
- judge_chuuki
332
- when :chuuki, :chuuki_in
333
- parse_chuuki
334
- when :body
335
- parse_body
336
- when :tail
337
- parse_tail
338
- else
339
- raise Aozora2Html::Error, "encount undefined condition"
340
- end
341
- end
342
-
343
- def judge_chuuki
344
- # 注記が入るかどうかチェック
345
- i = 0
346
- loop do
347
- case @stream.peek_char(i)
348
- when "-"
349
- i += 1
350
- when "\r\n"
351
- if i == 0 && @stream.peek_char(1) == "\r\n"
352
- @section = :body
353
- else
354
- @section = :chuuki
355
- end
356
- return
357
- else
358
- @section = :body
359
- @out.print("<br />\r\n")
360
- return
361
- end
362
- end
363
- end
364
-
365
- # headerは一行ずつ読む
366
- def parse_header
367
- string = read_line
368
- # refine from Tomita 09/06/14
369
- if string == "" # 空行がくれば、そこでヘッダー終了とみなす
370
- @section = :head_end
371
- @out.print @header.to_html
372
- else
373
- string.gsub!(RUBY_PREFIX,"")
374
- string.gsub!(PAT_RUBY,"")
375
- @header.push(string)
376
- end
377
- end
378
-
379
- def parse_chuuki
380
- string = read_line
381
- if string.match(/^\-+$/)
382
- case @section
383
- when :chuuki
384
- @section = :chuuki_in
385
- when :chuuki_in
386
- @section = :body
387
- end
388
- end
389
- end
390
-
391
- # 使うべきではない文字があるかチェックする
392
- #
393
- # 警告を出力するだけで結果には影響を与えない。警告する文字は以下:
394
- #
395
- # * 1バイト文字
396
- # * `#`ではなく`♯`
397
- # * JIS(JIS X 0208)外字
398
- #
399
- # @return [void]
400
- #
401
- def illegal_char_check(char, line)
402
- if char.is_a?(String)
403
- code = char.unpack("H*")[0]
404
- if code == "21" or
405
- code == "23" or
406
- ("a1" <= code and code <= "a5") or
407
- ("28" <= code and code <= "29") or
408
- code == "5b" or
409
- code == "5d" or
410
- code == "3d" or
411
- code == "3f" or
412
- code == "2b" or
413
- ("7b" <= code and code <= "7d")
414
- puts I18n.t(:warn_onebyte, line, char)
415
- end
416
-
417
- if code == "81f2"
418
- puts I18n.t(:warn_chuki, line, char)
419
- end
420
-
421
- if ("81ad" <= code and code <= "81b7") or
422
- ("81c0" <= code and code <= "81c7") or
423
- ("81cf" <= code and code <= "81d9") or
424
- ("81e9" <= code and code <= "81ef") or
425
- ("81f8" <= code and code <= "81fb") or
426
- ("8240" <= code and code <= "824e") or
427
- ("8259" <= code and code <= "825f") or
428
- ("827a" <= code and code <= "8280") or
429
- ("829b" <= code and code <= "829e") or
430
- ("82f2" <= code and code <= "82fc") or
431
- ("8397" <= code and code <= "839e") or
432
- ("83b7" <= code and code <= "83be") or
433
- ("83d7" <= code and code <= "83fc") or
434
- ("8461" <= code and code <= "846f") or
435
- ("8492" <= code and code <= "849e") or
436
- ("84bf" <= code and code <= "84fc") or
437
- ("8540" <= code and code <= "85fc") or
438
- ("8640" <= code and code <= "86fc") or
439
- ("8740" <= code and code <= "87fc") or
440
- ("8840" <= code and code <= "889e") or
441
- ("9873" <= code and code <= "989e") or
442
- ("eaa5" <= code and code <= "eafc") or
443
- ("eb40" <= code and code <= "ebfc") or
444
- ("ec40" <= code and code <= "ecfc") or
445
- ("ed40" <= code and code <= "edfc") or
446
- ("ee40" <= code and code <= "eefc") or
447
- ("ef40" <= code and code <= "effc")
448
- puts I18n.t(:warn_jis_gaiji, line, char)
449
- end
450
- end
451
- end
452
-
453
- # 本体解析部
454
- #
455
- # 1文字ずつ読み込み、dispatchして@buffer,@ruby_bufへしまう
456
- # 改行コードに当たったら溜め込んだものをgeneral_outputする
457
- #
458
- def parse_body
459
- char = read_char
460
- check = true
461
- case char
462
- when ACCENT_BEGIN
463
- check = false
464
- char = read_accent
465
- when TEIHON_MARK[0]
466
- if @buffer.length == 0
467
- ending_check
468
- end
469
- when GAIJI_MARK
470
- char = dispatch_gaiji
471
- when COMMAND_BEGIN
472
- char = dispatch_aozora_command
473
- when KU
474
- assign_kunoji
475
- when RUBY_BEGIN_MARK
476
- char = apply_ruby
477
- end
478
-
479
- case char
480
- when "\r\n"
481
- general_output
482
- when RUBY_PREFIX
483
- @ruby_buf.dump_into(@buffer)
484
- @ruby_buf.protected = true
485
- when @endchar
486
- # suddenly finished the file
487
- puts I18n.t(:warn_unexpected_terminator, line_number)
488
- throw :terminate
489
- when nil
490
- # noop
491
- else
492
- if check
493
- illegal_char_check(char, line_number)
494
- end
495
- push_chars(char)
496
- end
497
- end
498
-
499
- # 本文が終了したかどうかチェックする
500
- #
501
- #
502
- def ending_check
503
- # `底本:`でフッタ(:tail)に遷移
504
- if @stream.peek_char(0) == TEIHON_MARK[1] and @stream.peek_char(1) == TEIHON_MARK[2]
505
- @section = :tail
506
- ensure_close
507
- @out.print "</div>\r\n<div class=\"bibliographical_information\">\r\n<hr />\r\n<br />\r\n"
508
- end
509
- end
510
-
511
- # Original Aozora2Html#push_chars does not convert "'" into '&#39;'; it's old behaivor
512
- # of CGI.escapeHTML().
513
- #
514
- def push_chars(obj)
515
- if obj.is_a?(Array)
516
- obj.each do |x|
517
- push_chars(x)
518
- end
519
- elsif obj.is_a?(String)
520
- if obj.length == 1
521
- obj = obj.gsub(/[&\"<>]/, {'&' => '&amp;', '"' => '&quot;', '<' => '&lt;', '>' => '&gt;'})
522
- end
523
- obj.each_char do |x|
524
- push_char(x)
525
- end
526
- else
527
- push_char(obj)
528
- end
529
- end
530
-
531
- def push_char(char)
532
- ctype = char_type(char)
533
- if ctype == :hankaku_terminate and @ruby_buf.char_type == :hankaku
534
- if @ruby_buf.last_is_string?
535
- @ruby_buf.last_concat(char)
536
- else
537
- @ruby_buf.push(char)
538
- end
539
- @ruby_buf.char_type = :else
540
- elsif @ruby_buf.protected or (ctype != :else and ctype == @ruby_buf.char_type)
541
- if char.is_a?(String) and @ruby_buf.last_is_string?
542
- @ruby_buf.last_concat(char)
543
- else
544
- @ruby_buf.push(char)
545
- @ruby_buf.push("")
546
- end
547
- else
548
- @ruby_buf.dump_into(@buffer)
549
- @ruby_buf.clear(char)
550
- @ruby_buf.char_type = ctype
551
- end
552
- end
553
-
554
- # 行出力時に@bufferが空かどうか調べる
555
- #
556
- # @bufferの中身によって行末の出力が異なるため
557
- #
558
- # @return [true, false, :inline] 空文字ではない文字列が入っていればfalse、1行注記なら:inline、それ以外しか入っていなければtrue
559
- #
560
- def buf_is_blank?(buf)
561
- buf.each do |token|
562
- if token.is_a?(String) and token != ""
563
- return false
564
- elsif token.is_a?(Aozora2Html::Tag::OnelineIndent)
565
- return :inline
566
- end
567
- end
568
- true
569
- end
570
-
571
- # 行末で<br />を出力するべきかどうかの判別用
572
- #
573
- # @return [true, false] Multilineの注記しか入っていなければfalse、Multilineでも空文字でもない要素が含まれていればtrue
574
- #
575
- def terpri?(buf)
576
- flag = true
577
- buf.each do |x|
578
- if x.is_a?(Aozora2Html::Tag::Multiline)
579
- flag = false
580
- elsif x == ""
581
- # skip
582
- else
583
- return true
584
- end
585
- end
586
- flag
587
- end
588
-
589
- # 読み込んだ行の出力を行う
590
- #
591
- # parserが改行文字を読み込んだら呼ばれる。
592
- # 最終的に@ruby_bufと@bufferは初期化する
593
- #
594
- # @return [void]
595
- #
596
- def general_output
597
- if @style_stack.last
598
- raise Aozora2Html::Error, I18n.t(:dont_crlf_in_style, @style_stack.last_command)
599
- end
600
- # bufferにインデントタグだけがあったら改行しない!
601
- if @noprint
602
- @noprint = false
603
- return
604
- end
605
- @ruby_buf.dump_into(@buffer)
606
- buf = @buffer
607
- @ruby_buf.clear
608
- @buffer = []
609
- tail = []
610
-
611
- indent_type = buf_is_blank?(buf)
612
- terprip = (terpri?(buf) and @terprip)
613
- @terprip = true
614
-
615
- if @indent_stack.last.is_a?(String) and !indent_type
616
- @out.print @indent_stack.last
617
- end
618
-
619
- buf.each do |s|
620
- if s.is_a?(Aozora2Html::Tag::OnelineIndent)
621
- tail.unshift(s.close_tag)
622
- elsif s.is_a?(Aozora2Html::Tag::UnEmbedGaiji) and !s.escaped?
623
- # 消してあった※を復活させて
624
- @out.print GAIJI_MARK
625
- end
626
- @out.print s.to_s
627
- end
628
-
629
- # 最後はCRLFを出力する
630
- if @indent_stack.last.is_a?(String)
631
- # ぶら下げindent
632
- # tail always active
633
- @out.print tail.map{|s| s.to_s}.join("")
634
- if indent_type == :inline
635
- @out.print "\r\n"
636
- elsif indent_type and terprip
637
- @out.print "<br />\r\n"
638
- else
639
- @out.print "</div>\r\n"
640
- end
641
- elsif tail.empty? and terprip
642
- @out.print "<br />\r\n"
643
- else
644
- @out.print tail.map{|s| s.to_s}.join("")
645
- @out.print "\r\n"
646
- end
647
- end
648
-
649
- # 前方参照の発見 Ruby,style重ねがけ等々のため、要素の配列で返す
650
- #
651
- # 前方参照は`○○[#「○○」に傍点]`、`吹喋[#「喋」に「ママ」の注記]`といった表記
652
- def search_front_reference(string)
653
- if string.length == 0
654
- return false
655
- end
656
- searching_buf = if @ruby_buf.present?
657
- @ruby_buf.to_a
658
- else
659
- @buffer
660
- end
661
- last_string = searching_buf.last
662
- if last_string.is_a?(String)
663
- if last_string == ""
664
- searching_buf.pop
665
- search_front_reference(string)
666
- elsif last_string.match(Regexp.new(Regexp.quote(string)+"$"))
667
- # 完全一致
668
- # start = match.begin(0)
669
- # tail = match.end(0)
670
- # last_string[start,tail-start] = ""
671
- searching_buf.pop
672
- searching_buf.push(last_string.sub(Regexp.new(Regexp.quote(string)+"$"),""))
673
- [string]
674
- elsif string.match(Regexp.new(Regexp.quote(last_string)+"$"))
675
- # 部分一致
676
- tmp = searching_buf.pop
677
- found = search_front_reference(string.sub(Regexp.new(Regexp.quote(last_string)+"$"),""))
678
- if found
679
- found+[tmp]
680
- else
681
- searching_buf.push(tmp)
682
- false
683
- end
684
- end
685
- elsif last_string.is_a?(Aozora2Html::Tag::ReferenceMentioned)
686
- inner = last_string.target_string
687
- if inner == string
688
- # 完全一致
689
- searching_buf.pop
690
- [last_string]
691
- elsif string.match(Regexp.new(Regexp.quote(inner)+"$"))
692
- # 部分一致
693
- tmp = searching_buf.pop
694
- found = search_front_reference(string.sub(Regexp.new(Regexp.quote(inner)+"$"),""))
695
- if found
696
- found+[tmp]
697
- else
698
- searching_buf.push(tmp)
699
- false
700
- end
701
- end
702
- else
703
- false
704
- end
705
- end
706
-
707
- # 発見した前方参照を元に戻す
708
- #
709
- # @ruby_bufがあれば@ruby_bufに、なければ@bufferにpushする
710
- # バッファの最後と各要素が文字列ならconcatし、どちらが文字列でなければ(concatできないので)pushする
711
- #
712
- # @return [void]
713
- #
714
- def recovery_front_reference(reference)
715
- reference.each do |elt|
716
- # if @ruby_buf.protected
717
- if @ruby_buf.present?
718
- if @ruby_buf.last_is_string? && elt.is_a?(String)
719
- @ruby_buf.last_concat(elt)
720
- else
721
- @ruby_buf.push(elt)
722
- end
723
- elsif @buffer.last.is_a?(String)
724
- if elt.is_a?(String)
725
- @buffer.last.concat(elt)
726
- else
727
- @buffer.push(elt)
728
- end
729
- else
730
- @ruby_buf.push(elt)
731
- end
732
- end
733
- end
734
-
735
- def kuten2png(substring)
736
- desc = substring.gsub(PAT_KUTEN,"")
737
- match = desc.match(/[12]\-\d{1,2}\-\d{1,2}/)
738
- if match and !desc.match(NON_0213_GAIJI) and !desc.match(PAT_KUTEN_DUAL)
739
- @chuuki_table[:newjis] = true
740
- codes = match[0].split("-")
741
- folder = sprintf("%1d-%02d", codes[0], codes[1])
742
- code = sprintf("%1d-%02d-%02d",*codes)
743
- Aozora2Html::Tag::EmbedGaiji.new(self, folder, code, desc.gsub!(IGETA_MARK,""))
744
- else
745
- substring
746
- end
747
- end
748
-
749
- def escape_gaiji(command)
750
- _whole, kanji, line = command.match(PAT_GAIJI).to_a
751
- tmp = @images.assoc(kanji)
752
- if tmp
753
- tmp.push(line)
754
- else
755
- @images.push([kanji, line])
756
- end
757
- Aozora2Html::Tag::UnEmbedGaiji.new(self, command)
758
- end
759
-
760
- def dispatch_gaiji
761
- # 「※」の次が「[」でなければ外字ではない
762
- if @stream.peek_char(0) != COMMAND_BEGIN
763
- return GAIJI_MARK
764
- end
765
-
766
- # 「[」を読み捨てる
767
- _ = read_char
768
- # embed?
769
- command, _raw = read_to_nest(COMMAND_END)
770
- try_emb = kuten2png(command)
771
- if try_emb != command
772
- try_emb
773
- elsif command.match(/U\+([0-9A-F]{4,5})/) && Aozora2Html::Tag::EmbedGaiji.use_unicode
774
- unicode_num = $1
775
- Aozora2Html::Tag::EmbedGaiji.new(self, nil, nil, command, unicode_num)
776
- else
777
- # Unemb
778
- escape_gaiji(command)
779
- end
780
- end
781
-
782
- # 注記記法の場合分け
783
- def dispatch_aozora_command
784
- # 「[」の次が「#」でなければ注記ではない
785
- if @stream.peek_char(0) != IGETA_MARK
786
- return COMMAND_BEGIN
787
- end
788
-
789
- # 「#」を読み捨てる
790
- _ = read_char
791
- command,raw = read_to_nest(COMMAND_END)
792
- # 適用順序はこれで大丈夫か? 誤爆怖いよ誤爆
793
- if command.match(ORIKAESHI_COMMAND)
794
- apply_burasage(command)
795
-
796
- elsif command.start_with?(OPEN_MARK)
797
- exec_block_start_command(command)
798
- elsif command.start_with?(CLOSE_MARK)
799
- exec_block_end_command(command)
800
-
801
- elsif command.match(WARICHU_COMMAND)
802
- apply_warichu(command)
803
- elsif command.match(JISAGE_COMMAND)
804
- apply_jisage(command)
805
- elsif command.match(/fig(\d)+_(\d)+\.png/)
806
- exec_img_command(command,raw)
807
- # avoid to try complex ruby -- escape to notes
808
- elsif command.match(PAT_REST_NOTES)
809
- apply_rest_notes(command)
810
- elsif command.end_with?(END_MARK)
811
- exec_inline_end_command(command)
812
- nil
813
- elsif command.match(PAT_REF)
814
- exec_frontref_command(command)
815
- elsif command.match(/1-7-8[2345]/)
816
- apply_dakuten_katakana(command)
817
- elsif command.match(PAT_KAERITEN)
818
- Aozora2Html::Tag::Kaeriten.new(self, command)
819
- elsif command.match(PAT_OKURIGANA)
820
- Aozora2Html::Tag::Okurigana.new(self, command.gsub!(PAT_REMOVE_OKURIGANA,""))
821
- elsif command.match(PAT_CHITSUKI)
822
- apply_chitsuki(command)
823
- elsif exec_inline_start_command(command)
824
- nil
825
- else
826
- apply_rest_notes(command)
827
- end
828
- end
829
-
830
- def apply_burasage(command)
831
- tag = nil
832
- if implicit_close(:jisage)
833
- @terprip = false
834
- general_output
835
- end
836
- @noprint = true # always no print
837
- command = Utils.convert_japanese_number(command)
838
- if command.match(TENTSUKI_COMMAND)
839
- width = command.match(PAT_ORIKAESHI_JISAGE)[1]
840
- tag = '<div class="burasage" style="margin-left: ' + width + 'em; text-indent: -' + width + 'em;">'
841
- else
842
- match = command.match(PAT_ORIKAESHI_JISAGE2)
843
- left, indent = match.to_a[1,2]
844
- left = left.to_i - indent.to_i
845
- tag = "<div class=\"burasage\" style=\"margin-left: #{indent}em; text-indent: #{left}em;\">"
846
- end
847
- @indent_stack.push(tag)
848
- @tag_stack.push("") # dummy
849
- nil
850
- end
851
-
852
- def jisage_width(command)
853
- Utils.convert_japanese_number(command).match(/(\d*)(?:#{JISAGE_COMMAND})/)[1]
854
- end
855
-
856
- def apply_jisage(command)
857
- if command.match(MADE_MARK) or command.match(END_MARK)
858
- # 字下げ終わり
859
- explicit_close(:jisage)
860
- @indent_stack.pop
861
- nil
862
- elsif command.match(ONELINE_COMMAND)
863
- # 1行だけ
864
- @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
865
- nil
866
- elsif @buffer.length == 0 and @stream.peek_char(0) == "\r\n"
867
- # commandのみ
868
- @terprip = false
869
- implicit_close(:jisage)
870
- # adhook hack
871
- @noprint = false
872
- @indent_stack.push(:jisage)
873
- Aozora2Html::Tag::MultilineJisage.new(self, jisage_width(command))
874
- else
875
- @buffer.unshift(Aozora2Html::Tag::OnelineJisage.new(self, jisage_width(command)))
876
- nil
877
- end
878
- end
879
-
880
- def apply_warichu(command)
881
- if command.match(END_MARK)
882
- if @stream.peek_char(0) != PAREN_END_MARK
883
- push_chars(PAREN_END_MARK)
884
- end
885
- push_chars('</span>')
886
- else
887
- check = @ruby_buf.last
888
- push_chars('<span class="warichu">')
889
- unless check.is_a?(String) and check.end_with?(PAREN_BEGIN_MARK)
890
- push_chars(PAREN_BEGIN_MARK)
891
- end
892
- end
893
- nil
894
- end
895
-
896
- def chitsuki_length(command)
897
- command = Utils.convert_japanese_number(command)
898
- if match = command.match(PAT_JI_LEN)
899
- match[1]
900
- else
901
- "0"
902
- end
903
- end
904
-
905
- def apply_chitsuki(string, multiline = false)
906
- if string.match(CLOSE_MARK+INDENT_TYPE[:chitsuki]+END_MARK) or
907
- string.match(CLOSE_MARK+JIAGE_COMMAND+END_MARK)
908
- explicit_close(:chitsuki)
909
- @indent_stack.pop
910
- nil
911
- else
912
- len = chitsuki_length(string)
913
- if multiline
914
- # 複数行指定
915
- implicit_close(:chitsuki)
916
- @indent_stack.push(:chitsuki)
917
- Aozora2Html::Tag::MultilineChitsuki.new(self, len)
918
- else
919
- # 1行のみ
920
- Aozora2Html::Tag::OnelineChitsuki.new(self, len)
921
- end
922
- end
923
- end
924
-
925
- def new_midashi_id(size)
926
- if size.kind_of?(Integer)
927
- @midashi_id += size
928
- return @midashi_id
929
- end
930
-
931
- case size
932
- when /#{SIZE_SMALL}/
933
- inc = 1
934
- when /#{SIZE_MIDDLE}/
935
- inc = 10
936
- when /#{SIZE_LARGE}/
937
- inc = 100
938
- else
939
- raise Aozora2Html::Error, I18n.t(:undefined_header)
940
- end
941
- @midashi_id += inc
942
- end
943
-
944
- def apply_midashi(command)
945
- @indent_stack.push(:midashi)
946
- if command.match(DOGYO_MARK)
947
- midashi_type = :dogyo
948
- elsif command.match(MADO_MARK)
949
- midashi_type = :mado
950
- else
951
- midashi_type = :normal
952
- @terprip = false
953
- end
954
- Aozora2Html::Tag::MultilineMidashi.new(self, command, midashi_type)
955
- end
956
-
957
- def apply_yokogumi(command)
958
- @indent_stack.push(:yokogumi)
959
- Aozora2Html::Tag::MultilineYokogumi.new(self)
960
- end
961
-
962
- def apply_keigakomi(command)
963
- @indent_stack.push(:keigakomi)
964
- Aozora2Html::Tag::Keigakomi.new(self)
965
- end
966
-
967
- def apply_caption(command)
968
- @indent_stack.push(:caption)
969
- Aozora2Html::Tag::MultilineCaption.new(self)
970
- end
971
-
972
- def apply_jizume(command)
973
- w = Utils.convert_japanese_number(command).match(/(\d*)(?:#{INDENT_TYPE[:jizume]})/)[1]
974
- @indent_stack.push(:jizume)
975
- Aozora2Html::Tag::Jizume.new(self, w)
976
- end
977
-
978
- def push_block_tag(tag,closing)
979
- push_chars(tag)
980
- closing.concat(tag.close_tag)
981
- end
982
-
983
- def detect_style_size(style)
984
- if style.match("小".to_sjis)
985
- :sho
986
- else
987
- :dai
988
- end
989
- end
990
-
991
- def exec_inline_start_command(command)
992
- case command
993
- when CHUUKI_COMMAND
994
- @style_stack.push([command,'</ruby>'])
995
- push_char('<ruby><rb>')
996
- when TCY_COMMAND
997
- @style_stack.push([command,'</span>'])
998
- push_char('<span dir="ltr">')
999
- when KEIGAKOMI_COMMAND
1000
- @style_stack.push([command,'</span>'])
1001
- push_chars('<span class="keigakomi">')
1002
- when YOKOGUMI_COMMAND
1003
- @style_stack.push([command,'</span>'])
1004
- push_chars('<span class="yokogumi">')
1005
- when CAPTION_COMMAND
1006
- @style_stack.push([command,'</span>'])
1007
- push_chars('<span class="caption">')
1008
- when WARIGAKI_COMMAND
1009
- @style_stack.push([command,'</span>'])
1010
- push_chars('<span class="warigaki">')
1011
- when OMIDASHI_COMMAND
1012
- @style_stack.push([command,'</a></h3>'])
1013
- @terprip = false
1014
- push_chars("<h3 class=\"o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
1015
- when NAKAMIDASHI_COMMAND
1016
- @style_stack.push([command,'</a></h4>'])
1017
- @terprip = false
1018
- push_chars("<h4 class=\"naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
1019
- when KOMIDASHI_COMMAND
1020
- @style_stack.push([command,'</a></h5>'])
1021
- @terprip = false
1022
- push_chars("<h5 class=\"ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
1023
- when DOGYO_OMIDASHI_COMMAND
1024
- @style_stack.push([command,'</a></h3>'])
1025
- push_chars("<h3 class=\"dogyo-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
1026
- when DOGYO_NAKAMIDASHI_COMMAND
1027
- @style_stack.push([command,'</a></h4>'])
1028
- push_chars("<h4 class=\"dogyo-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
1029
- when DOGYO_KOMIDASHI_COMMAND
1030
- @style_stack.push([command,'</a></h5>'])
1031
- push_chars("<h5 class=\"dogyo-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
1032
- when MADO_OMIDASHI_COMMAND
1033
- @style_stack.push([command,'</a></h3>'])
1034
- push_chars("<h3 class=\"mado-o-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(100)}\">")
1035
- when MADO_NAKAMIDASHI_COMMAND
1036
- @style_stack.push([command,'</a></h4>'])
1037
- push_chars("<h4 class=\"mado-naka-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(10)}\">")
1038
- when MADO_KOMIDASHI_COMMAND
1039
- @style_stack.push([command,'</a></h5>'])
1040
- push_chars("<h5 class=\"mado-ko-midashi\"><a class=\"midashi_anchor\" id=\"midashi#{new_midashi_id(1)}\">")
1041
- when PAT_CHARSIZE
1042
- @style_stack.push([command,'</span>'])
1043
- _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1044
- times = Utils.convert_japanese_number(nest).to_i
1045
- daisho = detect_style_size(style)
1046
- html_class = daisho.to_s + times.to_s
1047
- size = Utils.create_font_size(times, daisho)
1048
- push_chars("<span class=\"#{html_class}\" style=\"font-size: #{size};\">")
1049
- else
1050
- ## Decoration ##
1051
- key = command
1052
- filter = lambda{|x| x}
1053
- if command.match(PAT_DIRECTION)
1054
- _whole, dir, com = command.match(PAT_DIRECTION).to_a
1055
- # renew command
1056
- key = com
1057
- if command.match(TEN_MARK)
1058
- case dir
1059
- when LEFT_MARK, UNDER_MARK
1060
- filter = lambda{|x| x + "_after"}
1061
- end
1062
- elsif command.match(SEN_MARK)
1063
- case dir
1064
- when LEFT_MARK, OVER_MARK
1065
- filter = lambda{|x| x.sub("under","over")}
1066
- end
1067
- end
1068
- end
1069
-
1070
- found = COMMAND_TABLE[key]
1071
- # found = [class, tag]
1072
- if found
1073
- @style_stack.push([command,"</#{found[1]}>"])
1074
- push_chars("<#{found[1]} class=\"#{filter.call(found[0])}\">")
1075
- else
1076
- if $DEBUG
1077
- puts I18n.t(:warn_undefined_command, line_number, key)
1078
- end
1079
- nil
1080
- end
1081
- end
1082
- end
1083
-
1084
- def exec_inline_end_command(command)
1085
- encount = command.sub(END_MARK,"")
1086
- if encount == MAIN_MARK
1087
- # force to finish main_text
1088
- @section = :tail
1089
- ensure_close
1090
- @noprint = true
1091
- @out.print "</div>\r\n<div class=\"after_text\">\r\n<hr />\r\n"
1092
- elsif encount.match(CHUUKI_COMMAND) and @style_stack.last_command == CHUUKI_COMMAND
1093
- # special inline ruby
1094
- @style_stack.pop
1095
- _whole, ruby = encount.match(PAT_INLINE_RUBY).to_a
1096
- push_char("</rb><rp>(</rp><rt>".to_sjis + ruby + "</rt><rp>)</rp></ruby>".to_sjis)
1097
- elsif @style_stack.last_command.match(encount)
1098
- push_chars(@style_stack.pop[1])
1099
- else
1100
- raise Aozora2Html::Error, I18n.t(:invalid_nesting, encount, @style_stack.last_command)
1101
- end
1102
- end
1103
-
1104
- def exec_block_start_command(command)
1105
- original_command = command.dup
1106
- command.sub!(/^#{OPEN_MARK}/, "")
1107
- match = ""
1108
- if command.match(INDENT_TYPE[:jisage])
1109
- push_block_tag(apply_jisage(command),match)
1110
- elsif command.match(/(#{INDENT_TYPE[:chitsuki]}|#{JIAGE_COMMAND})$/)
1111
- push_block_tag(apply_chitsuki(command,true),match)
1112
- end
1113
-
1114
- if command.match(INDENT_TYPE[:midashi])
1115
- push_block_tag(apply_midashi(command),match)
1116
- end
1117
-
1118
- if command.match(INDENT_TYPE[:jizume])
1119
- if match != ""
1120
- @indent_stack.pop
1121
- end
1122
- push_block_tag(apply_jizume(command),match)
1123
- end
1124
-
1125
- if command.match(INDENT_TYPE[:yokogumi])
1126
- if match != ""
1127
- @indent_stack.pop
1128
- end
1129
- push_block_tag(apply_yokogumi(command),match)
1130
- end
1131
-
1132
- if command.match(INDENT_TYPE[:keigakomi])
1133
- if match != ""
1134
- @indent_stack.pop
1135
- end
1136
- push_block_tag(apply_keigakomi(command),match)
1137
- end
1138
-
1139
- if command.match(INDENT_TYPE[:caption])
1140
- if match != ""
1141
- @indent_stack.pop
1142
- end
1143
- push_block_tag(apply_caption(command),match)
1144
- end
1145
-
1146
- if command.match(INDENT_TYPE[:futoji])
1147
- if match != ""
1148
- @indent_stack.pop
1149
- end
1150
- push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, "futoji"),match)
1151
- @indent_stack.push(:futoji)
1152
- end
1153
- if command.match(INDENT_TYPE[:shatai])
1154
- if match != ""
1155
- @indent_stack.pop
1156
- end
1157
- push_block_tag(Aozora2Html::Tag::MultilineStyle.new(self, "shatai"),match)
1158
- @indent_stack.push(:shatai)
1159
- end
1160
-
1161
- if command.match(PAT_CHARSIZE)
1162
- _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1163
- if match != ""
1164
- @indent_stack.pop
1165
- end
1166
- daisho = detect_style_size(style)
1167
- push_block_tag(Aozora2Html::Tag::FontSize.new(self,
1168
- Utils.convert_japanese_number(nest).to_i,
1169
- daisho),
1170
- match)
1171
- @indent_stack.push(daisho)
1172
- end
1173
-
1174
- if match == ""
1175
- apply_rest_notes(original_command)
1176
- else
1177
- @tag_stack.push(match)
1178
- nil
1179
- end
1180
- end
1181
-
1182
- # コマンド文字列からモードのシンボルを取り出す
1183
- #
1184
- # @return [Symbol]
1185
- #
1186
- def detect_command_mode(command)
1187
- if command.match(INDENT_TYPE[:chitsuki]+END_MARK) || command.match(JIAGE_COMMAND+END_MARK)
1188
- return :chitsuki
1189
- end
1190
- INDENT_TYPE.keys.each do |key|
1191
- if command.match(INDENT_TYPE[key])
1192
- return key
1193
- end
1194
- end
1195
- return nil
1196
- end
1197
-
1198
- def exec_block_end_command(command)
1199
- original_command = command.dup
1200
- command.sub!(/^#{CLOSE_MARK}/, "")
1201
- match = false
1202
- mode = detect_command_mode(command)
1203
- if mode
1204
- explicit_close(mode)
1205
- match = @indent_stack.pop
1206
- end
1207
-
1208
- if match
1209
- if !match.is_a?(String)
1210
- @terprip = false
1211
- end
1212
- nil
1213
- else
1214
- apply_rest_notes(original_command)
1215
- end
1216
- end
1217
-
1218
- def exec_img_command(command,raw)
1219
- match = raw.match(PAT_IMAGE)
1220
- if match
1221
- _whole, alt, src, _wh, width, height = match.to_a
1222
- css_class = if alt.match(PHOTO_COMMAND)
1223
- "photo"
1224
- else
1225
- "illustration"
1226
- end
1227
- Aozora2Html::Tag::Img.new(self, src, css_class, alt, width, height)
1228
- else
1229
- apply_rest_notes(command)
1230
- end
1231
- end
1232
-
1233
- def exec_frontref_command(command)
1234
- _whole, reference, spec1, spec2 = command.match(PAT_FRONTREF).to_a
1235
- if spec1
1236
- spec = spec1 + spec2
1237
- else
1238
- spec = spec2
1239
- end
1240
- if reference and found = search_front_reference(reference)
1241
- tmp = exec_style(found, spec)
1242
- if tmp
1243
- return tmp
1244
- else
1245
- recovery_front_reference(found)
1246
- end
1247
- end
1248
- # comment out?
1249
- apply_rest_notes(command)
1250
- end
1251
-
1252
- # 傍記を並べる用
1253
- #
1254
- def multiply(bouki, times)
1255
- sep = "&nbsp;"
1256
- ([bouki]*times).join(sep)
1257
- end
1258
-
1259
- # arrayがルビを含んでいればそのインデックスを返す
1260
- #
1261
- # @return [Integer, nil]
1262
- #
1263
- def include_ruby?(array)
1264
- array.index do |elt|
1265
- if elt.is_a?(Aozora2Html::Tag::Ruby)
1266
- true
1267
- elsif elt.is_a?(Aozora2Html::Tag::ReferenceMentioned)
1268
- if elt.target.is_a?(Array)
1269
- include_ruby?(elt.target)
1270
- else
1271
- elt.target.is_a?(Aozora2Html::Tag::Ruby)
1272
- end
1273
- end
1274
- end
1275
- end
1276
-
1277
- # rubyタグの再生成(本体はrearrange_ruby)
1278
- #
1279
- # complex ruby wrap up utilities -- don't erase! we will use soon ...
1280
- #
1281
- def rearrange_ruby_tag(targets, upper_ruby, under_ruby = "")
1282
- target, upper, under = rearrange_ruby(targets, upper_ruby, under_ruby)
1283
- Aozora2Html::Tag::Ruby.new(self, target, upper, under)
1284
- end
1285
-
1286
- # rubyタグの再割り当て
1287
- def rearrange_ruby(targets, upper_ruby, under_ruby = "")
1288
- if include_ruby?(targets)
1289
- new_targets = []
1290
- new_upper = if upper_ruby != ""
1291
- upper_ruby
1292
- else
1293
- []
1294
- end
1295
- new_under = if under_ruby != ""
1296
- under_ruby
1297
- else
1298
- []
1299
- end
1300
- if new_upper.length > 1 and new_under.length > 1
1301
- raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby)
1302
- end
1303
-
1304
- targets.each{|x|
1305
- if x.is_a?(Aozora2Html::Tag::Ruby)
1306
- if x.target.is_a?(Array)
1307
- # inner Aozora2Html::Tag::Ruby is already complex ... give up
1308
- raise Aozora2Html::Error, I18n.t(:dont_use_double_ruby)
1309
- else
1310
- if x.ruby != ""
1311
- if new_upper.is_a?(Array)
1312
- new_upper.push(x.ruby)
1313
- else
1314
- raise Aozora2Html::Error, I18n.t(:dont_use_double_ruby)
1315
- end
1316
- else
1317
- if new_under.is_a?(Array)
1318
- new_under.push(x.under_ruby)
1319
- else
1320
- raise Aozora2Html::Error, I18n.t(:dont_use_double_ruby)
1321
- end
1322
- end
1323
- new_targets.push(x.target)
1324
- end
1325
- elsif x.is_a?(Aozora2Html::Tag::ReferenceMentioned)
1326
- if x.target.is_a?(Array)
1327
- # recursive
1328
- tar,up,un = rearrange_ruby(x.target, "", "")
1329
- # rotation!!
1330
- tar.each{|y|
1331
- tmp = x.dup
1332
- tmp.target = y
1333
- new_targets.push(tmp)}
1334
- if new_under.is_a?(Array)
1335
- new_under.concat(un)
1336
- elsif un.to_s.length > 0
1337
- raise Aozora2Html::Error, I18n.t(:dont_use_double_ruby)
1338
- end
1339
- if new_upper.is_a?(Array)
1340
- new_upper.concat(up)
1341
- elsif up.to_s.length > 0
1342
- raise Aozora2Html::Error, I18n.t(:dont_use_double_ruby)
1343
- end
1344
- else
1345
- new_targets.push(x)
1346
- if new_under.is_a?(Array)
1347
- new_under.push("")
1348
- end
1349
- if new_upper.is_a?(Array)
1350
- new_upper.push("")
1351
- end
1352
- end
1353
- else
1354
- new_targets.push(x)
1355
- if new_under.is_a?(Array)
1356
- new_under.push("")
1357
- end
1358
- if new_upper.is_a?(Array)
1359
- new_upper.push("")
1360
- end
1361
- end
1362
- }
1363
- [new_targets, new_upper, new_under]
1364
- else
1365
- [targets, upper_ruby, under_ruby]
1366
- end
1367
- end
1368
-
1369
- def exec_style(targets, command)
1370
- try_kuten = kuten2png(command)
1371
- if try_kuten != command
1372
- try_kuten
1373
- elsif command.match(TCY_COMMAND)
1374
- Aozora2Html::Tag::Dir.new(self, targets)
1375
- elsif command.match(YOKOGUMI_COMMAND)
1376
- Aozora2Html::Tag::InlineYokogumi.new(self, targets)
1377
- elsif command.match(KEIGAKOMI_COMMAND)
1378
- Aozora2Html::Tag::InlineKeigakomi.new(self, targets)
1379
- elsif command.match(CAPTION_COMMAND)
1380
- Aozora2Html::Tag::InlineCaption.new(self, targets)
1381
- elsif command.match(KAERITEN_COMMAND)
1382
- Aozora2Html::Tag::Kaeriten.new(self, targets)
1383
- elsif command.match(KUNTEN_OKURIGANA_COMMAND)
1384
- Aozora2Html::Tag::Okurigana.new(self, targets)
1385
- elsif command.match(MIDASHI_COMMAND)
1386
- midashi_type = :normal
1387
- if command.match(DOGYO_MARK)
1388
- midashi_type = :dogyo
1389
- elsif command.match(MADO_MARK)
1390
- midashi_type = :mado
1391
- else
1392
- @terprip = false
1393
- end
1394
- Aozora2Html::Tag::Midashi.new(self, targets, command, midashi_type)
1395
- elsif command.match(PAT_CHARSIZE)
1396
- _whole, nest, style = command.match(PAT_CHARSIZE).to_a
1397
- Aozora2Html::Tag::InlineFontSize.new(self,targets,
1398
- Utils.convert_japanese_number(nest).to_i,
1399
- detect_style_size(style))
1400
- elsif command.match(PAT_RUBY_DIR)
1401
- _whole, _dir, under = command.match(PAT_RUBY_DIR).to_a
1402
- if targets.length == 1 and targets[0].is_a?(Aozora2Html::Tag::Ruby)
1403
- tag = targets[0]
1404
- if tag.under_ruby == ""
1405
- tag.under_ruby = under
1406
- tag
1407
- else
1408
- raise Aozora2Html::Error, I18n.t(:dont_allow_triple_ruby)
1409
- end
1410
- else
1411
- rearrange_ruby_tag(targets, "", under)
1412
- end
1413
- elsif command.match(PAT_CHUUKI)
1414
- rearrange_ruby_tag(targets, PAT_CHUUKI.match(command).to_a[1])
1415
- elsif command.match(PAT_BOUKI)
1416
- rearrange_ruby_tag(targets, multiply(PAT_BOUKI.match(command).to_a[1], targets.to_s.length))
1417
- else
1418
- ## direction fix! ##
1419
- filter = lambda{|x| x}
1420
- if command.match(PAT_DIRECTION)
1421
- _whole, dir, com = command.match(PAT_DIRECTION).to_a
1422
- # renew command
1423
- command = com
1424
- if command.match(TEN_MARK)
1425
- case dir
1426
- when LEFT_MARK, UNDER_MARK
1427
- filter = lambda{|x| x + "_after"}
1428
- end
1429
- elsif command.match(SEN_MARK)
1430
- case dir
1431
- when LEFT_MARK, OVER_MARK
1432
- filter = lambda{|x| x.sub("under","over")}
1433
- end
1434
- end
1435
- end
1436
-
1437
- found = COMMAND_TABLE[command]
1438
- # found = [class, tag]
1439
- if found
1440
- Aozora2Html::Tag::Decorate.new(self, targets, filter.call(found[0]), found[1])
1441
- else
1442
- nil
1443
- end
1444
- end
1445
- end
1446
-
1447
- def apply_dakuten_katakana(command)
1448
- n = command.match(/1-7-8([2345])/).to_a[1]
1449
- frontref = DAKUTEN_KATAKANA_TABLE[n]
1450
- if found = search_front_reference(frontref)
1451
- Aozora2Html::Tag::DakutenKatakana.new(self, n,found.join)
1452
- else
1453
- apply_rest_notes(command)
1454
- end
1455
- end
1456
-
1457
- # くの字点の処理
1458
- #
1459
- # くの字点は現状そのまま出力するのでフッタの「表記について」で出力するかどうかのフラグ処理だけ行う
1460
- def assign_kunoji
1461
- second = @stream.peek_char(0)
1462
- case second
1463
- when NOJI
1464
- @chuuki_table[:kunoji] = true
1465
- when DAKUTEN
1466
- if @stream.peek_char(1) == NOJI
1467
- @chuuki_table[:dakutenkunoji] = true
1468
- end
1469
- end
1470
- end
1471
-
1472
- def apply_rest_notes(command)
1473
- @chuuki_table[:chuki] = true
1474
- Aozora2Html::Tag::EditorNote.new(self, command)
1475
- end
1476
-
1477
- # |が来たときは文字種を無視してruby_bufを守らなきゃいけない
1478
- def apply_ruby
1479
- @ruby_buf.protected = nil
1480
- ruby, _raw = read_to_nest(RUBY_END_MARK)
1481
- if ruby.length == 0
1482
- # escaped ruby character
1483
- return RUBY_BEGIN_MARK+RUBY_END_MARK
1484
- end
1485
- ans = ""
1486
- notes = []
1487
- @ruby_buf.each do |token|
1488
- if token.is_a?(Aozora2Html::Tag::UnEmbedGaiji)
1489
- ans.concat(GAIJI_MARK)
1490
- token.escape!
1491
- notes.push(token)
1492
- else
1493
- ans.concat(token.to_s)
1494
- end
1495
- end
1496
- @buffer.push(Aozora2Html::Tag::Ruby.new(self, ans, ruby))
1497
- @buffer += notes
1498
- @ruby_buf.clear
1499
- nil
1500
- end
1501
-
1502
- # parse_bodyのフッタ版
1503
- def parse_tail
1504
- char = read_char
1505
- check = true
1506
- case char
1507
- when ACCENT_BEGIN
1508
- check = false
1509
- char = read_accent
1510
- when @endchar
1511
- throw :terminate
1512
- when GAIJI_MARK
1513
- char = dispatch_gaiji
1514
- when COMMAND_BEGIN
1515
- char = dispatch_aozora_command
1516
- when KU
1517
- assign_kunoji
1518
- when RUBY_BEGIN_MARK
1519
- char = apply_ruby
1520
- end
1521
-
1522
- case char
1523
- when "\r\n"
1524
- tail_output
1525
- when RUBY_PREFIX
1526
- @ruby_buf.dump_into(@buffer)
1527
- @ruby_buf.protected = true
1528
- when nil
1529
- # noop
1530
- else
1531
- if check
1532
- illegal_char_check(char, line_number)
1533
- end
1534
- push_chars(char)
1535
- end
1536
- end
1537
-
1538
- # general_outputのフッタ版
1539
- def tail_output
1540
- @ruby_buf.dump_into(@buffer)
1541
- string = @buffer.join
1542
- @ruby_buf.clear
1543
- @buffer = []
1544
- string.gsub!("info@aozora.gr.jp",'<a href="mailto: info@aozora.gr.jp">info@aozora.gr.jp</a>')
1545
- string.gsub!("青空文庫(http://www.aozora.gr.jp/)".to_sjis){"<a href=\"http://www.aozora.gr.jp/\">#{$&}</a>"}
1546
- if string.match(/(<br \/>$|<\/p>$|<\/h\d>$|<div.*>$|<\/div>$|^<[^>]*>$)/)
1547
- @out.print string, "\r\n"
1548
- else
1549
- @out.print string, "<br />\r\n"
1550
- end
1551
- end
1552
-
1553
- # `●表記について`で使用した注記等を出力する
1554
- def hyoki
1555
- # <br /> times fix
1556
- @out.print "<br />\r\n</div>\r\n<div class=\"notation_notes\">\r\n<hr />\r\n<br />\r\n●表記について<br />\r\n<ul>\r\n".to_sjis
1557
- @out.print "\t<li>このファイルは W3C 勧告 XHTML1.1 にそった形式で作成されています。</li>\r\n".to_sjis
1558
- if @chuuki_table[:chuki]
1559
- @out.print "\t<li>[#…]は、入力者による注を表す記号です。</li>\r\n".to_sjis
1560
- end
1561
- if @chuuki_table[:kunoji]
1562
- if @chuuki_table[:dakutenkunoji]
1563
- @out.printf("\t<li>「くの字点」は「%s」で、「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU+NOJI, KU+DAKUTEN+NOJI)
1564
- else
1565
- @out.printf("\t<li>「くの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU+NOJI)
1566
- end
1567
- elsif @chuuki_table[:dakutenkunoji]
1568
- @out.printf("\t<li>「濁点付きくの字点」は「%s」で表しました。</li>\r\n".to_sjis, KU+DAKUTEN+NOJI)
1569
- end
1570
- if @chuuki_table[:newjis] && !Aozora2Html::Tag::EmbedGaiji.use_jisx0213
1571
- @out.print "\t<li>「くの字点」をのぞくJIS X 0213にある文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1572
- end
1573
- if @chuuki_table[:accent] && !Aozora2Html::Tag::Accent.use_jisx0213
1574
- @out.print "\t<li>アクセント符号付きラテン文字は、画像化して埋め込みました。</li>\r\n".to_sjis
1575
- end
1576
- if @images[0]
1577
- @out.print "\t<li>この作品には、JIS X 0213にない、以下の文字が用いられています。(数字は、底本中の出現「ページ-行」数。)これらの文字は本文内では「※[#…]」の形で示しました。</li>\r\n</ul>\r\n<br />\r\n\t\t<table class=\"gaiji_list\">\r\n".to_sjis
1578
- @images.each{|cell|
1579
- k,*v = cell
1580
- vs = v.join("、".to_sjis)
1581
- @out.print " <tr>
1582
- <td>
1583
- #{k}
1584
- </td>
1585
- <td>&nbsp;&nbsp;</td>
1586
- <td>
1587
- #{vs} </td>
1588
- <!--
1589
- <td>
1590
- <img src=\"../../../gaiji/others/xxxx.png\" alt=\"#{k}\" width=32 height=32 />
1591
- </td>
1592
- -->
1593
- </tr>
1594
- ".to_sjis
1595
- }
1596
- @out.print "\t\t</table>\r\n".to_sjis
1597
- else
1598
- @out.print "</ul>\r\n" # <ul>内に<li>以外のエレメントが来るのは不正なので修正
1599
- end
1600
- @out.print "</div>\r\n"
1601
- end
1602
- end
1603
-
1604
- if $0 == __FILE__
1605
- # todo: 引数チェックとか
1606
- Aozora2Html.new($*[0],$*[1]).process
1607
- end