aozora2html 0.7.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/rubocop.yml +19 -0
  3. data/.github/workflows/ruby.yml +37 -0
  4. data/.gitignore +8 -3
  5. data/.rubocop.yml +111 -0
  6. data/.rubocop_todo.yml +7 -0
  7. data/CHANGELOG.md +59 -0
  8. data/Gemfile +2 -0
  9. data/Guardfile +3 -1
  10. data/HACKING.md +45 -0
  11. data/LICENSE +116 -0
  12. data/README.md +29 -16
  13. data/Rakefile +12 -5
  14. data/aozora2html.gemspec +24 -24
  15. data/bin/aozora2html +25 -71
  16. data/lib/aozora2html/accent_parser.rb +99 -0
  17. data/lib/aozora2html/error.rb +17 -0
  18. data/lib/aozora2html/header.rb +141 -0
  19. data/lib/aozora2html/i18n.rb +48 -0
  20. data/lib/aozora2html/ruby_buffer.rb +116 -0
  21. data/lib/aozora2html/string_refinements.rb +36 -0
  22. data/lib/aozora2html/style_stack.rb +33 -0
  23. data/lib/aozora2html/tag/accent.rb +37 -0
  24. data/lib/aozora2html/tag/block.rb +21 -0
  25. data/lib/aozora2html/tag/chitsuki.rb +19 -0
  26. data/lib/aozora2html/tag/dakuten_katakana.rb +25 -0
  27. data/lib/aozora2html/tag/decorate.rb +19 -0
  28. data/lib/aozora2html/tag/dir.rb +17 -0
  29. data/lib/aozora2html/tag/editor_note.rb +19 -0
  30. data/lib/aozora2html/tag/embed_gaiji.rb +52 -0
  31. data/lib/aozora2html/tag/font_size.rb +20 -0
  32. data/lib/aozora2html/tag/gaiji.rb +12 -0
  33. data/lib/aozora2html/tag/img.rb +21 -0
  34. data/lib/aozora2html/tag/indent.rb +8 -0
  35. data/lib/aozora2html/tag/inline.rb +16 -0
  36. data/lib/aozora2html/tag/inline_caption.rb +17 -0
  37. data/lib/aozora2html/tag/inline_font_size.rb +19 -0
  38. data/lib/aozora2html/tag/inline_keigakomi.rb +17 -0
  39. data/lib/aozora2html/tag/inline_yokogumi.rb +17 -0
  40. data/lib/aozora2html/tag/jisage.rb +17 -0
  41. data/lib/aozora2html/tag/jizume.rb +19 -0
  42. data/lib/aozora2html/tag/kaeriten.rb +17 -0
  43. data/lib/aozora2html/tag/keigakomi.rb +19 -0
  44. data/lib/aozora2html/tag/kunten.rb +12 -0
  45. data/lib/aozora2html/tag/midashi.rb +20 -0
  46. data/lib/aozora2html/tag/multiline.rb +9 -0
  47. data/lib/aozora2html/tag/multiline_caption.rb +13 -0
  48. data/lib/aozora2html/tag/multiline_chitsuki.rb +10 -0
  49. data/lib/aozora2html/tag/multiline_jisage.rb +10 -0
  50. data/lib/aozora2html/tag/multiline_midashi.rb +25 -0
  51. data/lib/aozora2html/tag/multiline_style.rb +19 -0
  52. data/lib/aozora2html/tag/multiline_yokogumi.rb +14 -0
  53. data/lib/aozora2html/tag/okurigana.rb +17 -0
  54. data/lib/aozora2html/tag/oneline_chitsuki.rb +10 -0
  55. data/lib/aozora2html/tag/oneline_indent.rb +9 -0
  56. data/lib/aozora2html/tag/oneline_jisage.rb +10 -0
  57. data/lib/aozora2html/tag/reference_mentioned.rb +47 -0
  58. data/lib/aozora2html/tag/ruby.rb +202 -0
  59. data/lib/aozora2html/tag/un_embed_gaiji.rb +30 -0
  60. data/lib/aozora2html/tag.rb +57 -0
  61. data/lib/aozora2html/tag_parser.rb +60 -0
  62. data/lib/aozora2html/text_buffer.rb +50 -0
  63. data/lib/aozora2html/utils.rb +156 -0
  64. data/lib/aozora2html/version.rb +3 -1
  65. data/lib/aozora2html/yaml_loader.rb +37 -0
  66. data/lib/aozora2html/zip.rb +4 -0
  67. data/lib/aozora2html.rb +1359 -8
  68. data/lib/extensions.rb +12 -0
  69. data/lib/jstream.rb +139 -0
  70. data/sample/chukiichiran_kinyurei.html +15 -2
  71. data/sample/chukiichiran_kinyurei.txt +15 -2
  72. data/test/test_aozora2html.rb +323 -73
  73. data/test/test_aozora_accent_parser.rb +34 -6
  74. data/test/test_command_parse.rb +216 -0
  75. data/test/test_compat.rb +3 -4
  76. data/test/test_dakuten_katakana_tag.rb +12 -13
  77. data/test/test_decorate_tag.rb +11 -7
  78. data/test/test_dir_tag.rb +11 -7
  79. data/test/test_editor_note_tag.rb +9 -6
  80. data/test/test_exception.rb +11 -9
  81. data/test/test_font_size_tag.rb +22 -11
  82. data/test/test_gaiji_tag.rb +22 -14
  83. data/test/test_header.rb +45 -0
  84. data/test/test_helper.rb +3 -1
  85. data/test/test_i18n.rb +39 -0
  86. data/test/test_img_tag.rb +11 -6
  87. data/test/test_inline_caption_tag.rb +11 -7
  88. data/test/test_inline_font_size_tag.rb +15 -11
  89. data/test/test_inline_keigakomi_tag.rb +11 -7
  90. data/test/test_inline_yokogumi_tag.rb +11 -7
  91. data/test/test_jizume_tag.rb +11 -8
  92. data/test/test_jstream.rb +33 -30
  93. data/test/test_kaeriten_tag.rb +11 -7
  94. data/test/test_keigakomi_tag.rb +14 -11
  95. data/test/test_midashi_tag.rb +39 -0
  96. data/test/test_multiline_caption_tag.rb +11 -8
  97. data/test/test_multiline_midashi_tag.rb +26 -26
  98. data/test/test_multiline_style_tag.rb +11 -8
  99. data/test/test_multiline_yokogumi_tag.rb +11 -8
  100. data/test/test_okurigana_tag.rb +11 -7
  101. data/test/test_ruby_parse.rb +130 -0
  102. data/test/test_ruby_tag.rb +11 -7
  103. data/test/test_tag_parser.rb +31 -29
  104. data/vendor/jis2ucs/README.md +3 -6
  105. data/yml/accent_table.yml +240 -0
  106. data/yml/command_table.yml +61 -0
  107. data/yml/jis2ucs.yml +11234 -0
  108. metadata +99 -21
  109. data/.travis.yml +0 -12
  110. data/appveyor.yml +0 -23
  111. data/lib/accent_tag.rb +0 -23
  112. data/lib/aozora2html/jis2ucs.rb +0 -11237
  113. data/lib/embed_gaiji_tag.rb +0 -34
  114. data/lib/t2hs.rb +0 -2535
data/lib/extensions.rb ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Kernel extension
4
+ module Kernel
5
+ alias original_kernel_puts puts
6
+
7
+ def puts(*args)
8
+ original_kernel_puts(args)
9
+ rescue Encoding::CompatibilityError
10
+ original_kernel_puts(args.map { |arg| arg.force_encoding('utf-8') })
11
+ end
12
+ end
data/lib/jstream.rb ADDED
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'aozora2html/error'
4
+ require_relative 'aozora2html/i18n'
5
+
6
+ ##
7
+ # Stream class for reading a file.
8
+ #
9
+ # It's just a wrapper class of IO to read characters.
10
+ # when finished to read IO, return a symbol :eof.
11
+ # when found line terminator except CR+LF, exit.
12
+ #
13
+ class Jstream
14
+ CR = "\r"
15
+ LF = "\n"
16
+ CRLF = CR + LF
17
+
18
+ # 初期化と同時に、いったん最初の行をscanして、改行コードがCR+LFかどうか調べる。
19
+ # CR+LFでない場合はエラーメッセージを出力してexitする(!)
20
+ #
21
+ # TODO: 将来的にはさすがにexitまではしないよう、仕様を変更する?
22
+ def initialize(file_io)
23
+ @line = 0
24
+ @current_char = nil
25
+ @file = file_io
26
+
27
+ begin
28
+ tmp = @file.readline.chomp!("\r\n")
29
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf) unless tmp
30
+ rescue Aozora2Html::Error => e
31
+ puts e.message(1)
32
+ if e.is_a?(Aozora2Html::Error)
33
+ exit(2)
34
+ end
35
+ ensure
36
+ @file.rewind
37
+ end
38
+ end
39
+
40
+ def inspect
41
+ "#<jcode-stream input #{@file.inspect}>"
42
+ end
43
+
44
+ # 1文字読み込んで返す
45
+ #
46
+ # 行末の場合は(1文字ではなく)CR+LFを返す
47
+ # EOFまで到達すると :eof というシンボルを返す
48
+ #
49
+ # TODO: EOFの場合はnilを返すように変更する?
50
+ def read_char
51
+ char = @file.getc
52
+
53
+ if char == CR
54
+ char2 = @file.getc
55
+ if char2 != LF
56
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
57
+ end
58
+
59
+ @line += 1
60
+ @current_char = char + char2
61
+ elsif char.nil?
62
+ @current_char = :eof
63
+ else
64
+ @current_char = char
65
+ end
66
+
67
+ @current_char
68
+ end
69
+
70
+ # pos個分の文字を先読みし、最後の文字を返す
71
+ #
72
+ # ファイルディスクリプタは移動しない(実行前の位置まで戻す)
73
+ # 行末の場合は(1文字ではなく)CR+LFを返す
74
+ # 行末の先に進んだ場合の挙動は未定義になる
75
+ def peek_char(pos)
76
+ original_pos = @file.pos
77
+ char = nil
78
+
79
+ begin
80
+ pos.times { read_char }
81
+
82
+ char = @file.getc
83
+ if char == CR
84
+ char2 = @file.getc
85
+ if char2 != LF
86
+ raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
87
+ end
88
+
89
+ char += char2
90
+ end
91
+ ensure
92
+ @file.seek(original_pos)
93
+ end
94
+
95
+ char
96
+ end
97
+
98
+ # 指定された終端文字(1文字のStringかCRLF)まで読み込む
99
+ #
100
+ # @param [String] endchar 終端文字
101
+ def read_to(endchar)
102
+ buf = +''
103
+ loop do
104
+ char = read_char
105
+ break if char == endchar
106
+
107
+ if char.is_a?(Symbol)
108
+ print endchar
109
+ end
110
+ buf.concat(char)
111
+ end
112
+ buf
113
+ end
114
+
115
+ # 1行読み込み
116
+ #
117
+ # @return [String] 読み込んだ文字列を返す
118
+ #
119
+ def read_line
120
+ read_to("\r\n")
121
+ end
122
+
123
+ def close
124
+ @file.close
125
+ end
126
+
127
+ # 現在の行数を返す
128
+ #
129
+ # 何も読み込む前は0、読み込み始めの最初の文字から\r\nまでが1、その次の文字から次の\r\nは2、……といった値になる
130
+ def line
131
+ if @file.pos == 0
132
+ 0
133
+ elsif @current_char == CRLF
134
+ @line
135
+ else
136
+ @line + 1
137
+ end
138
+ end
139
+ end
@@ -426,6 +426,10 @@ presqu'<img src="../../../gaiji/1-09/1-09-68.png" alt="
426
426
  <br />
427
427
  �@�\�\�\�\�\�\�\�\�\�\<br />
428
428
  <br />
429
+ �u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i&quot;Et maintenant oserait-on soutenir q<img src="../../../gaiji/1-09/1-09-79.png" alt="��(�A�L���[�g�A�N�Z���g�t��U������)" class="gaiji" />il faut distinguer les lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat pathologique des lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat normal? Ce serait vouloir distinguer les lois de la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tombe, des lois do la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tient debout.&quot;�j<br />
430
+ <br />
431
+ �@�\�\�\�\�\�\�\�\�\�\<br />
432
+ <br />
429
433
  ���P�_<br />
430
434
  <br />
431
435
  ���Ԃ�_<br />
@@ -756,6 +760,10 @@ Which, teaching us, hath this exordium: <span class="shatai">Nothing from nothin
756
760
  <br />
757
761
  <ruby><rb>��󕶌�</rb><rp>�i</rp><rt>��������Ԃ�</rt><rp>�j</rp></ruby><span class="notes">�m���u��󕶌Ɂv�̍��Ɂu�}�}�v�̒��L�n</span><br />
758
762
  <br />
763
+ �@�\�\�\�\�\�\�\�\�\�\<br />
764
+ <br />
765
+ �N�`�����m�I���b�`���n�A<ruby><rb>�H��</rb><rp>�i</rp><rt>�~&nbsp;�~</rt><rp>�j</rp></ruby>�f<em class="sesame_dot">���X��</em>���g�C�f�C���E�`�j�A�O���O���}���b�e����<em class="sesame_dot">�g��</em>�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B<br />
766
+ <br />
759
767
  ���c�g�ݒ��ʼn��ɕ��񂾕���<br />
760
768
  <br />
761
769
  �ċ@�a<span dir="ltr">29</span>�̕ґ��́A<br />
@@ -976,6 +984,11 @@ Mors ubi dira fuit vita salusque patent.<br />
976
984
  <br />
977
985
  <h3 class="o-midashi"><a class="midashi_anchor" id="midashi1628"><ruby><rb>�����w</rb><rp>�i</rp><rt>����݂�����</rt><rp>�j</rp></ruby>��</a></h3>
978
986
  <br />
987
+ �@�\�\�\�\�\�\�\�\�\�\<br />
988
+ <br />
989
+ <div class="jisage_8" style="margin-left: 8em"><h3 class="mado-o-midashi"><a class="midashi_anchor" id="midashi1728">��</a></h3>�i<span class="warichu">�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗�</span>�j</div>
990
+ <br />
991
+ <br />
979
992
  ���‹󕶌ɂ����������p�i�ԊO�j<br />
980
993
  <br />
981
994
  ���{���I���<br />
@@ -1018,11 +1031,11 @@ Mors ubi dira fuit vita salusque patent.<br />
1018
1031
  <div class="after_text">
1019
1032
  <hr />
1020
1033
  <br />
1021
- ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ł��B<br />
1034
+ ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B<br />
1022
1035
  ���́F�‹󕶌�<br />
1023
1036
  �Z���F�‹󕶌�<br />
1024
1037
  2010�N3��30���쐬<br />
1025
- 2012N4��11���C��<br />
1038
+ 2021N12��15���C��<br />
1026
1039
  �‹󕶌ɍ쐬�t�@�C���F<br />
1027
1040
  ���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA<a href="http://www.aozora.gr.jp/">�‹󕶌Ɂihttp://www.aozora.gr.jp/�j</a>�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B<br />
1028
1041
  <br />
@@ -436,6 +436,10 @@ presqu'
436
436
 
437
437
  �@�\�\�\�\�\�\�\�\�\�\
438
438
 
439
+ �u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i�k"Et maintenant oserait-on soutenir qu'il faut distinguer les lois de la vie a` l'e'tat pathologique des lois de la vie a` l'e'tat normal? Ce serait vouloir distinguer les lois de la me'canique dans une maison qui tombe, des lois do la me'canique dans une maison qui tient debout."�l�j
440
+
441
+ �@�\�\�\�\�\�\�\�\�\�\
442
+
439
443
  ���P�_
440
444
 
441
445
  ���Ԃ�_
@@ -766,6 +770,10 @@ Which, teaching us, hath this exordium: Nothing from nothing ever yet was born.
766
770
 
767
771
  ��󕶌Ɂs��������Ԃ񂱁t�m���u��󕶌Ɂv�̍��Ɂu�}�}�v�̒��L�n
768
772
 
773
+ �@�\�\�\�\�\�\�\�\�\�\
774
+
775
+ �N�`�����m�I���b�`���n�A�H��m���u�H��v�Ɂu�~�v�̖T�L�n�f���X���m���u���X���v�ɖT�_�n���g�C�f�C���E�`�j�A�O���O���}���b�e�����g�΁m���u�g�΁v�ɖT�_�n�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B
776
+
769
777
  ���c�g�ݒ��ʼn��ɕ��񂾕���
770
778
 
771
779
  �ċ@�a29�m���u29�v�͏c�����n�̕ґ��́A
@@ -986,6 +994,11 @@ Mors ubi dira fuit vita salusque patent.
986
994
 
987
995
  �����w�s����݂������t�Łm���u�����w�Łv�͑匩�o���n
988
996
 
997
+ �@�\�\�\�\�\�\�\�\�\�\
998
+
999
+ �m���W�������n���m���u���v�͑��匩�o���n�i�m�����蒍�n�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗��m�����蒍�I���n�j
1000
+
1001
+
989
1002
  ���‹󕶌ɂ����������p�i�ԊO�j
990
1003
 
991
1004
  ���{���I���
@@ -1025,10 +1038,10 @@ Mors ubi dira fuit vita salusque patent.
1025
1038
 
1026
1039
 
1027
1040
  �m���{���I���n
1028
- ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ł��B
1041
+ ���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ�𔲂��o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B
1029
1042
  ���́F�‹󕶌�
1030
1043
  �Z���F�‹󕶌�
1031
1044
  2010�N3��30���쐬
1032
- 2012N4��11���C��
1045
+ 2021N12��15���C��
1033
1046
  �‹󕶌ɍ쐬�t�@�C���F
1034
1047
  ���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA�‹󕶌Ɂihttp://www.aozora.gr.jp/�j�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B