aozora2html 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +4 -1
  4. data/.rubocop.yml +36 -152
  5. data/.rubocop_todo.yml +7 -0
  6. data/CHANGELOG.md +26 -0
  7. data/Gemfile +2 -0
  8. data/Guardfile +3 -1
  9. data/HACKING.md +45 -0
  10. data/README.md +14 -6
  11. data/Rakefile +12 -5
  12. data/aozora2html.gemspec +24 -22
  13. data/bin/aozora2html +21 -19
  14. data/lib/aozora2html/accent_parser.rb +62 -54
  15. data/lib/aozora2html/error.rb +5 -4
  16. data/lib/aozora2html/header.rb +20 -18
  17. data/lib/aozora2html/i18n.rb +40 -20
  18. data/lib/aozora2html/ruby_buffer.rb +63 -28
  19. data/lib/aozora2html/string_refinements.rb +36 -0
  20. data/lib/aozora2html/style_stack.rb +6 -0
  21. data/lib/aozora2html/tag/accent.rb +10 -12
  22. data/lib/aozora2html/tag/block.rb +11 -9
  23. data/lib/aozora2html/tag/chitsuki.rb +6 -2
  24. data/lib/aozora2html/tag/dakuten_katakana.rb +10 -8
  25. data/lib/aozora2html/tag/decorate.rb +4 -3
  26. data/lib/aozora2html/tag/dir.rb +4 -2
  27. data/lib/aozora2html/tag/editor_note.rb +7 -4
  28. data/lib/aozora2html/tag/embed_gaiji.rb +15 -11
  29. data/lib/aozora2html/tag/font_size.rb +5 -2
  30. data/lib/aozora2html/tag/gaiji.rb +4 -3
  31. data/lib/aozora2html/tag/img.rb +4 -4
  32. data/lib/aozora2html/tag/indent.rb +3 -3
  33. data/lib/aozora2html/tag/inline.rb +10 -7
  34. data/lib/aozora2html/tag/inline_caption.rb +4 -2
  35. data/lib/aozora2html/tag/inline_font_size.rb +4 -3
  36. data/lib/aozora2html/tag/inline_keigakomi.rb +4 -2
  37. data/lib/aozora2html/tag/inline_yokogumi.rb +4 -3
  38. data/lib/aozora2html/tag/jisage.rb +3 -1
  39. data/lib/aozora2html/tag/jizume.rb +3 -0
  40. data/lib/aozora2html/tag/kaeriten.rb +4 -2
  41. data/lib/aozora2html/tag/keigakomi.rb +15 -9
  42. data/lib/aozora2html/tag/kunten.rb +4 -4
  43. data/lib/aozora2html/tag/midashi.rb +3 -1
  44. data/lib/aozora2html/tag/multiline.rb +3 -0
  45. data/lib/aozora2html/tag/multiline_caption.rb +6 -8
  46. data/lib/aozora2html/tag/multiline_chitsuki.rb +3 -1
  47. data/lib/aozora2html/tag/multiline_jisage.rb +3 -1
  48. data/lib/aozora2html/tag/multiline_midashi.rb +6 -3
  49. data/lib/aozora2html/tag/multiline_style.rb +5 -3
  50. data/lib/aozora2html/tag/multiline_yokogumi.rb +6 -9
  51. data/lib/aozora2html/tag/okurigana.rb +4 -2
  52. data/lib/aozora2html/tag/oneline_chitsuki.rb +3 -2
  53. data/lib/aozora2html/tag/oneline_indent.rb +8 -1
  54. data/lib/aozora2html/tag/oneline_jisage.rb +3 -0
  55. data/lib/aozora2html/tag/reference_mentioned.rb +22 -21
  56. data/lib/aozora2html/tag/ruby.rb +174 -70
  57. data/lib/aozora2html/tag/un_embed_gaiji.rb +8 -2
  58. data/lib/aozora2html/tag.rb +40 -38
  59. data/lib/aozora2html/tag_parser.rb +23 -16
  60. data/lib/aozora2html/text_buffer.rb +50 -0
  61. data/lib/aozora2html/utils.rb +113 -50
  62. data/lib/aozora2html/version.rb +3 -1
  63. data/lib/aozora2html/yaml_loader.rb +8 -2
  64. data/lib/aozora2html/zip.rb +4 -0
  65. data/lib/aozora2html.rb +1358 -3
  66. data/lib/extensions.rb +2 -34
  67. data/lib/jstream.rb +96 -25
  68. data/sample/chukiichiran_kinyurei.html +15 -2
  69. data/sample/chukiichiran_kinyurei.txt +15 -2
  70. data/test/test_aozora2html.rb +137 -148
  71. data/test/test_aozora_accent_parser.rb +26 -9
  72. data/test/test_command_parse.rb +25 -22
  73. data/test/test_compat.rb +3 -4
  74. data/test/test_dakuten_katakana_tag.rb +10 -12
  75. data/test/test_decorate_tag.rb +9 -6
  76. data/test/test_dir_tag.rb +9 -6
  77. data/test/test_editor_note_tag.rb +8 -5
  78. data/test/test_exception.rb +10 -8
  79. data/test/test_font_size_tag.rb +16 -13
  80. data/test/test_gaiji_tag.rb +15 -14
  81. data/test/test_header.rb +25 -40
  82. data/test/test_helper.rb +3 -1
  83. data/test/test_i18n.rb +22 -6
  84. data/test/test_img_tag.rb +9 -5
  85. data/test/test_inline_caption_tag.rb +9 -6
  86. data/test/test_inline_font_size_tag.rb +13 -10
  87. data/test/test_inline_keigakomi_tag.rb +9 -6
  88. data/test/test_inline_yokogumi_tag.rb +9 -6
  89. data/test/test_jizume_tag.rb +9 -7
  90. data/test/test_jstream.rb +33 -30
  91. data/test/test_kaeriten_tag.rb +9 -6
  92. data/test/test_keigakomi_tag.rb +11 -9
  93. data/test/test_midashi_tag.rb +15 -14
  94. data/test/test_multiline_caption_tag.rb +7 -5
  95. data/test/test_multiline_midashi_tag.rb +24 -25
  96. data/test/test_multiline_style_tag.rb +9 -7
  97. data/test/test_multiline_yokogumi_tag.rb +7 -5
  98. data/test/test_okurigana_tag.rb +9 -6
  99. data/test/test_ruby_parse.rb +14 -14
  100. data/test/test_ruby_tag.rb +9 -6
  101. data/test/test_tag_parser.rb +28 -26
  102. metadata +60 -14
  103. data/.travis.yml +0 -12
  104. data/lib/t2hs.rb +0 -1607
data/lib/extensions.rb CHANGED
@@ -1,38 +1,6 @@
1
- # encoding: utf-8
2
- # 1.8 like to_s method to Array
3
- class Array
4
- def to_s
5
- self.join
6
- end
7
- end
8
-
9
- class String
10
-
11
- # used in Aozora2Html#char_type
12
- def char_type
13
- ch = self
14
- if ch.match(Regexp.new("[ぁ-んゝゞ]".encode("shift_jis")))
15
- :hiragana
16
- elsif ch.match(Regexp.new("[ァ-ンーヽヾヴ]".encode("shift_jis")))
17
- :katakana
18
- elsif ch.match(Regexp.new("[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]".encode("shift_jis")))
19
- :zenkaku
20
- elsif ch.match(Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".encode("shift_jis")))
21
- :hankaku
22
- elsif ch.match(Regexp.new("[亜-熙々※仝〆〇ヶ]".encode("shift_jis")))
23
- :kanji
24
- elsif ch.match(/[\.\;\"\?\!\)]/)
25
- :hankaku_terminate
26
- else
27
- :else
28
- end
29
- end
30
-
31
- def to_sjis
32
- self.encode("shift_jis")
33
- end
34
- end
1
+ # frozen_string_literal: true
35
2
 
3
+ # Kernel extension
36
4
  module Kernel
37
5
  alias original_kernel_puts puts
38
6
 
data/lib/jstream.rb CHANGED
@@ -1,5 +1,7 @@
1
- require "aozora2html/error"
2
- require "aozora2html/i18n"
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'aozora2html/error'
4
+ require_relative 'aozora2html/i18n'
3
5
 
4
6
  ##
5
7
  # Stream class for reading a file.
@@ -9,60 +11,129 @@ require "aozora2html/i18n"
9
11
  # when found line terminator except CR+LF, exit.
10
12
  #
11
13
  class Jstream
14
+ CR = "\r"
15
+ LF = "\n"
16
+ CRLF = CR + LF
12
17
 
13
- attr_accessor :line
14
-
18
+ # 初期化と同時に、いったん最初の行をscanして、改行コードがCR+LFかどうか調べる。
19
+ # CR+LFでない場合はエラーメッセージを出力してexitする(!)
20
+ #
21
+ # TODO: 将来的にはさすがにexitまではしないよう、仕様を変更する?
15
22
  def initialize(file_io)
16
23
  @line = 0
17
- @entry = false
24
+ @current_char = nil
18
25
  @file = file_io
26
+
19
27
  begin
20
- store_to_buffer
28
+ tmp = @file.readline.chomp!("\r\n")
29
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf) unless tmp
21
30
  rescue Aozora2Html::Error => e
22
31
  puts e.message(1)
23
32
  if e.is_a?(Aozora2Html::Error)
24
33
  exit(2)
25
34
  end
35
+ ensure
36
+ @file.rewind
26
37
  end
27
38
  end
28
39
 
29
40
  def inspect
30
- "#<jcode-stream input " + @file.inspect + ">"
41
+ "#<jcode-stream input #{@file.inspect}>"
31
42
  end
32
43
 
44
+ # 1文字読み込んで返す
45
+ #
46
+ # 行末の場合は(1文字ではなく)CR+LFを返す
47
+ # EOFまで到達すると :eof というシンボルを返す
48
+ #
49
+ # TODO: EOFの場合はnilを返すように変更する?
33
50
  def read_char
34
- found = @buffer.shift
35
- if @entry
51
+ char = @file.getc
52
+
53
+ if char == CR
54
+ char2 = @file.getc
55
+ if char2 != LF
56
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
57
+ end
58
+
36
59
  @line += 1
37
- @entry = false
38
- end
39
- if found
40
- return found
60
+ @current_char = char + char2
61
+ elsif char.nil?
62
+ @current_char = :eof
63
+ else
64
+ @current_char = char
41
65
  end
42
66
 
67
+ @current_char
68
+ end
69
+
70
+ # pos個分の文字を先読みし、最後の文字を返す
71
+ #
72
+ # ファイルディスクリプタは移動しない(実行前の位置まで戻す)
73
+ # 行末の場合は(1文字ではなく)CR+LFを返す
74
+ # 行末の先に進んだ場合の挙動は未定義になる
75
+ def peek_char(pos)
76
+ original_pos = @file.pos
77
+ char = nil
78
+
43
79
  begin
44
- store_to_buffer
45
- rescue EOFError
46
- @buffer = [:eof]
80
+ pos.times { read_char }
81
+
82
+ char = @file.getc
83
+ if char == CR
84
+ char2 = @file.getc
85
+ if char2 != LF
86
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
87
+ end
88
+
89
+ char += char2
90
+ end
91
+ ensure
92
+ @file.seek(original_pos)
47
93
  end
48
- "\r\n"
94
+
95
+ char
49
96
  end
50
97
 
51
- def peek_char(pos)
52
- @buffer[pos] || "\r\n"
98
+ # 指定された終端文字(1文字のStringかCRLF)まで読み込む
99
+ #
100
+ # @param [String] endchar 終端文字
101
+ def read_to(endchar)
102
+ buf = +''
103
+ loop do
104
+ char = read_char
105
+ break if char == endchar
106
+
107
+ if char.is_a?(Symbol)
108
+ print endchar
109
+ end
110
+ buf.concat(char)
111
+ end
112
+ buf
113
+ end
114
+
115
+ # 1行読み込み
116
+ #
117
+ # @return [String] 読み込んだ文字列を返す
118
+ #
119
+ def read_line
120
+ read_to("\r\n")
53
121
  end
54
122
 
55
123
  def close
56
124
  @file.close
57
125
  end
58
126
 
59
- private
60
- def store_to_buffer
61
- if tmp = @file.readline.chomp!("\r\n")
62
- @buffer = tmp.each_char.to_a
127
+ # 現在の行数を返す
128
+ #
129
+ # 何も読み込む前は0、読み込み始めの最初の文字から\r\nまでが1、その次の文字から次の\r\nは2、……といった値になる
130
+ def line
131
+ if @file.pos == 0
132
+ 0
133
+ elsif @current_char == CRLF
134
+ @line
63
135
  else
64
- raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
136
+ @line + 1
65
137
  end
66
- @entry = true
67
138
  end
68
139
  end
@@ -426,6 +426,10 @@ presqu'<img src="../../../gaiji/1-09/1-09-68.png" alt="
426
426
  <br />
427
427
  �@�\�\�\�\�\�\�\�\�\�\<br />
428
428
  <br />
429
+ �u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i&quot;Et maintenant oserait-on soutenir q<img src="../../../gaiji/1-09/1-09-79.png" alt="��(�A�L���[�g�A�N�Z���g�t��U������)" class="gaiji" />il faut distinguer les lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat pathologique des lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat normal? Ce serait vouloir distinguer les lois de la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tombe, des lois do la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tient debout.&quot;�j<br />
430
+ <br />
431
+ �@�\�\�\�\�\�\�\�\�\�\<br />
432
+ <br />
429
433
  ���P�_<br />
430
434
  <br />
431
435
  ���Ԃ�_<br />
@@ -756,6 +760,10 @@ Which, teaching us, hath this exordium: <span class="shatai">Nothing from nothin
756
760
  <br />
757
761
  <ruby><rb>��󕶌�</rb><rp>�i</rp><rt>��������Ԃ�</rt><rp>�j</rp></ruby><span class="notes">�m���u��󕶌Ɂv�̍��Ɂu�}�}�v�̒��L�n</span><br />
758
762
  <br />
763
+ �@�\�\�\�\�\�\�\�\�\�\<br />
764
+ <br />
765
+ �N�`�����m�I���b�`���n�A<ruby><rb>�H��</rb><rp>�i</rp><rt>�~&nbsp;�~</rt><rp>�j</rp></ruby>�f<em class="sesame_dot">���X��</em>���g�C�f�C���E�`�j�A�O���O���}���b�e����<em class="sesame_dot">�g��</em>�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B<br />
766
+ <br />
759
767
  ���c�g�ݒ��ʼn��ɕ��񂾕���<br />
760
768
  <br />
761
769
  �ċ@�a<span dir="ltr">29</span>�̕ґ��́A<br />
@@ -976,6 +984,11 @@ Mors ubi dira fuit vita salusque patent.<br />
976
984
  <br />
977
985
  <h3 class="o-midashi"><a class="midashi_anchor" id="midashi1628"><ruby><rb>�����w</rb><rp>�i</rp><rt>����݂�����</rt><rp>�j</rp></ruby>��</a></h3>
978
986
  <br />
987
+ �@�\�\�\�\�\�\�\�\�\�\<br />
988
+ <br />
989
+ <div class="jisage_8" style="margin-left: 8em"><h3 class="mado-o-midashi"><a class="midashi_anchor" id="midashi1728">��</a></h3>�i<span class="warichu">�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗�</span>�j</div>
990
+ <br />
991
+ <br />
979
992
  ���‹󕶌ɂ����������p�i�ԊO�j<br />
980
993
  <br />
981
994
  ���{���I���<br />
@@ -1018,11 +1031,11 @@ Mors ubi dira fuit vita salusque patent.<br />
1018
1031
  <div class="after_text">
1019
1032
  <hr />
1020
1033
  <br />
1021
- ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ł��B<br />
1034
+ ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B<br />
1022
1035
  ���́F�‹󕶌�<br />
1023
1036
  �Z���F�‹󕶌�<br />
1024
1037
  2010�N3��30���쐬<br />
1025
- 2012N4��11���C��<br />
1038
+ 2021N12��15���C��<br />
1026
1039
  �‹󕶌ɍ쐬�t�@�C���F<br />
1027
1040
  ���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA<a href="http://www.aozora.gr.jp/">�‹󕶌Ɂihttp://www.aozora.gr.jp/�j</a>�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B<br />
1028
1041
  <br />
@@ -436,6 +436,10 @@ presqu'
436
436
 
437
437
  �@�\�\�\�\�\�\�\�\�\�\
438
438
 
439
+ �u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i�k"Et maintenant oserait-on soutenir qu'il faut distinguer les lois de la vie a` l'e'tat pathologique des lois de la vie a` l'e'tat normal? Ce serait vouloir distinguer les lois de la me'canique dans une maison qui tombe, des lois do la me'canique dans une maison qui tient debout."�l�j
440
+
441
+ �@�\�\�\�\�\�\�\�\�\�\
442
+
439
443
  ���P�_
440
444
 
441
445
  ���Ԃ�_
@@ -766,6 +770,10 @@ Which, teaching us, hath this exordium: Nothing from nothing ever yet was born.
766
770
 
767
771
  ��󕶌Ɂs��������Ԃ񂱁t�m���u��󕶌Ɂv�̍��Ɂu�}�}�v�̒��L�n
768
772
 
773
+ �@�\�\�\�\�\�\�\�\�\�\
774
+
775
+ �N�`�����m�I���b�`���n�A�H��m���u�H��v�Ɂu�~�v�̖T�L�n�f���X���m���u���X���v�ɖT�_�n���g�C�f�C���E�`�j�A�O���O���}���b�e�����g�΁m���u�g�΁v�ɖT�_�n�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B
776
+
769
777
  ���c�g�ݒ��ʼn��ɕ��񂾕���
770
778
 
771
779
  �ċ@�a29�m���u29�v�͏c�����n�̕ґ��́A
@@ -986,6 +994,11 @@ Mors ubi dira fuit vita salusque patent.
986
994
 
987
995
  �����w�s����݂������t�Łm���u�����w�Łv�͑匩�o���n
988
996
 
997
+ �@�\�\�\�\�\�\�\�\�\�\
998
+
999
+ �m���W�������n���m���u���v�͑��匩�o���n�i�m�����蒍�n�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗��m�����蒍�I���n�j
1000
+
1001
+
989
1002
  ���‹󕶌ɂ����������p�i�ԊO�j
990
1003
 
991
1004
  ���{���I���
@@ -1025,10 +1038,10 @@ Mors ubi dira fuit vita salusque patent.
1025
1038
 
1026
1039
 
1027
1040
  �m���{���I���n
1028
- ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ł��B
1041
+ ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B
1029
1042
  ���́F�‹󕶌�
1030
1043
  �Z���F�‹󕶌�
1031
1044
  2010�N3��30���쐬
1032
- 2012N4��11���C��
1045
+ 2021N12��15���C��
1033
1046
  �‹󕶌ɍ쐬�t�@�C���F
1034
1047
  ���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA�‹󕶌Ɂihttp://www.aozora.gr.jp/�j�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B