aozora2html 2.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rubocop.yml +19 -0
- data/.github/workflows/ruby.yml +4 -1
- data/.rubocop.yml +36 -152
- data/.rubocop_todo.yml +7 -0
- data/CHANGELOG.md +26 -0
- data/Gemfile +2 -0
- data/Guardfile +3 -1
- data/HACKING.md +45 -0
- data/README.md +14 -6
- data/Rakefile +12 -5
- data/aozora2html.gemspec +24 -22
- data/bin/aozora2html +21 -19
- data/lib/aozora2html/accent_parser.rb +62 -54
- data/lib/aozora2html/error.rb +5 -4
- data/lib/aozora2html/header.rb +20 -18
- data/lib/aozora2html/i18n.rb +40 -20
- data/lib/aozora2html/ruby_buffer.rb +63 -28
- data/lib/aozora2html/string_refinements.rb +36 -0
- data/lib/aozora2html/style_stack.rb +6 -0
- data/lib/aozora2html/tag/accent.rb +10 -12
- data/lib/aozora2html/tag/block.rb +11 -9
- data/lib/aozora2html/tag/chitsuki.rb +6 -2
- data/lib/aozora2html/tag/dakuten_katakana.rb +10 -8
- data/lib/aozora2html/tag/decorate.rb +4 -3
- data/lib/aozora2html/tag/dir.rb +4 -2
- data/lib/aozora2html/tag/editor_note.rb +7 -4
- data/lib/aozora2html/tag/embed_gaiji.rb +15 -11
- data/lib/aozora2html/tag/font_size.rb +5 -2
- data/lib/aozora2html/tag/gaiji.rb +4 -3
- data/lib/aozora2html/tag/img.rb +4 -4
- data/lib/aozora2html/tag/indent.rb +3 -3
- data/lib/aozora2html/tag/inline.rb +10 -7
- data/lib/aozora2html/tag/inline_caption.rb +4 -2
- data/lib/aozora2html/tag/inline_font_size.rb +4 -3
- data/lib/aozora2html/tag/inline_keigakomi.rb +4 -2
- data/lib/aozora2html/tag/inline_yokogumi.rb +4 -3
- data/lib/aozora2html/tag/jisage.rb +3 -1
- data/lib/aozora2html/tag/jizume.rb +3 -0
- data/lib/aozora2html/tag/kaeriten.rb +4 -2
- data/lib/aozora2html/tag/keigakomi.rb +15 -9
- data/lib/aozora2html/tag/kunten.rb +4 -4
- data/lib/aozora2html/tag/midashi.rb +3 -1
- data/lib/aozora2html/tag/multiline.rb +3 -0
- data/lib/aozora2html/tag/multiline_caption.rb +6 -8
- data/lib/aozora2html/tag/multiline_chitsuki.rb +3 -1
- data/lib/aozora2html/tag/multiline_jisage.rb +3 -1
- data/lib/aozora2html/tag/multiline_midashi.rb +6 -3
- data/lib/aozora2html/tag/multiline_style.rb +5 -3
- data/lib/aozora2html/tag/multiline_yokogumi.rb +6 -9
- data/lib/aozora2html/tag/okurigana.rb +4 -2
- data/lib/aozora2html/tag/oneline_chitsuki.rb +3 -2
- data/lib/aozora2html/tag/oneline_indent.rb +8 -1
- data/lib/aozora2html/tag/oneline_jisage.rb +3 -0
- data/lib/aozora2html/tag/reference_mentioned.rb +22 -21
- data/lib/aozora2html/tag/ruby.rb +174 -70
- data/lib/aozora2html/tag/un_embed_gaiji.rb +8 -2
- data/lib/aozora2html/tag.rb +40 -38
- data/lib/aozora2html/tag_parser.rb +23 -16
- data/lib/aozora2html/text_buffer.rb +50 -0
- data/lib/aozora2html/utils.rb +113 -50
- data/lib/aozora2html/version.rb +3 -1
- data/lib/aozora2html/yaml_loader.rb +8 -2
- data/lib/aozora2html/zip.rb +4 -0
- data/lib/aozora2html.rb +1358 -3
- data/lib/extensions.rb +2 -34
- data/lib/jstream.rb +96 -25
- data/sample/chukiichiran_kinyurei.html +15 -2
- data/sample/chukiichiran_kinyurei.txt +15 -2
- data/test/test_aozora2html.rb +137 -148
- data/test/test_aozora_accent_parser.rb +26 -9
- data/test/test_command_parse.rb +25 -22
- data/test/test_compat.rb +3 -4
- data/test/test_dakuten_katakana_tag.rb +10 -12
- data/test/test_decorate_tag.rb +9 -6
- data/test/test_dir_tag.rb +9 -6
- data/test/test_editor_note_tag.rb +8 -5
- data/test/test_exception.rb +10 -8
- data/test/test_font_size_tag.rb +16 -13
- data/test/test_gaiji_tag.rb +15 -14
- data/test/test_header.rb +25 -40
- data/test/test_helper.rb +3 -1
- data/test/test_i18n.rb +22 -6
- data/test/test_img_tag.rb +9 -5
- data/test/test_inline_caption_tag.rb +9 -6
- data/test/test_inline_font_size_tag.rb +13 -10
- data/test/test_inline_keigakomi_tag.rb +9 -6
- data/test/test_inline_yokogumi_tag.rb +9 -6
- data/test/test_jizume_tag.rb +9 -7
- data/test/test_jstream.rb +33 -30
- data/test/test_kaeriten_tag.rb +9 -6
- data/test/test_keigakomi_tag.rb +11 -9
- data/test/test_midashi_tag.rb +15 -14
- data/test/test_multiline_caption_tag.rb +7 -5
- data/test/test_multiline_midashi_tag.rb +24 -25
- data/test/test_multiline_style_tag.rb +9 -7
- data/test/test_multiline_yokogumi_tag.rb +7 -5
- data/test/test_okurigana_tag.rb +9 -6
- data/test/test_ruby_parse.rb +14 -14
- data/test/test_ruby_tag.rb +9 -6
- data/test/test_tag_parser.rb +28 -26
- metadata +60 -14
- data/.travis.yml +0 -12
- data/lib/t2hs.rb +0 -1607
data/lib/extensions.rb
CHANGED
@@ -1,38 +1,6 @@
|
|
1
|
-
#
|
2
|
-
# 1.8 like to_s method to Array
|
3
|
-
class Array
|
4
|
-
def to_s
|
5
|
-
self.join
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
class String
|
10
|
-
|
11
|
-
# used in Aozora2Html#char_type
|
12
|
-
def char_type
|
13
|
-
ch = self
|
14
|
-
if ch.match(Regexp.new("[ぁ-んゝゞ]".encode("shift_jis")))
|
15
|
-
:hiragana
|
16
|
-
elsif ch.match(Regexp.new("[ァ-ンーヽヾヴ]".encode("shift_jis")))
|
17
|
-
:katakana
|
18
|
-
elsif ch.match(Regexp.new("[0-9A-Za-zΑ-Ωα-ωА-Яа-я−&’,.]".encode("shift_jis")))
|
19
|
-
:zenkaku
|
20
|
-
elsif ch.match(Regexp.new("[A-Za-z0-9#\\-\\&'\\,]".encode("shift_jis")))
|
21
|
-
:hankaku
|
22
|
-
elsif ch.match(Regexp.new("[亜-熙々※仝〆〇ヶ]".encode("shift_jis")))
|
23
|
-
:kanji
|
24
|
-
elsif ch.match(/[\.\;\"\?\!\)]/)
|
25
|
-
:hankaku_terminate
|
26
|
-
else
|
27
|
-
:else
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_sjis
|
32
|
-
self.encode("shift_jis")
|
33
|
-
end
|
34
|
-
end
|
1
|
+
# frozen_string_literal: true
|
35
2
|
|
3
|
+
# Kernel extension
|
36
4
|
module Kernel
|
37
5
|
alias original_kernel_puts puts
|
38
6
|
|
data/lib/jstream.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'aozora2html/error'
|
4
|
+
require_relative 'aozora2html/i18n'
|
3
5
|
|
4
6
|
##
|
5
7
|
# Stream class for reading a file.
|
@@ -9,60 +11,129 @@ require "aozora2html/i18n"
|
|
9
11
|
# when found line terminator except CR+LF, exit.
|
10
12
|
#
|
11
13
|
class Jstream
|
14
|
+
CR = "\r"
|
15
|
+
LF = "\n"
|
16
|
+
CRLF = CR + LF
|
12
17
|
|
13
|
-
|
14
|
-
|
18
|
+
# 初期化と同時に、いったん最初の行をscanして、改行コードがCR+LFかどうか調べる。
|
19
|
+
# CR+LFでない場合はエラーメッセージを出力してexitする(!)
|
20
|
+
#
|
21
|
+
# TODO: 将来的にはさすがにexitまではしないよう、仕様を変更する?
|
15
22
|
def initialize(file_io)
|
16
23
|
@line = 0
|
17
|
-
@
|
24
|
+
@current_char = nil
|
18
25
|
@file = file_io
|
26
|
+
|
19
27
|
begin
|
20
|
-
|
28
|
+
tmp = @file.readline.chomp!("\r\n")
|
29
|
+
raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf) unless tmp
|
21
30
|
rescue Aozora2Html::Error => e
|
22
31
|
puts e.message(1)
|
23
32
|
if e.is_a?(Aozora2Html::Error)
|
24
33
|
exit(2)
|
25
34
|
end
|
35
|
+
ensure
|
36
|
+
@file.rewind
|
26
37
|
end
|
27
38
|
end
|
28
39
|
|
29
40
|
def inspect
|
30
|
-
"#<jcode-stream input
|
41
|
+
"#<jcode-stream input #{@file.inspect}>"
|
31
42
|
end
|
32
43
|
|
44
|
+
# 1文字読み込んで返す
|
45
|
+
#
|
46
|
+
# 行末の場合は(1文字ではなく)CR+LFを返す
|
47
|
+
# EOFまで到達すると :eof というシンボルを返す
|
48
|
+
#
|
49
|
+
# TODO: EOFの場合はnilを返すように変更する?
|
33
50
|
def read_char
|
34
|
-
|
35
|
-
|
51
|
+
char = @file.getc
|
52
|
+
|
53
|
+
if char == CR
|
54
|
+
char2 = @file.getc
|
55
|
+
if char2 != LF
|
56
|
+
raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
|
57
|
+
end
|
58
|
+
|
36
59
|
@line += 1
|
37
|
-
@
|
38
|
-
|
39
|
-
|
40
|
-
|
60
|
+
@current_char = char + char2
|
61
|
+
elsif char.nil?
|
62
|
+
@current_char = :eof
|
63
|
+
else
|
64
|
+
@current_char = char
|
41
65
|
end
|
42
66
|
|
67
|
+
@current_char
|
68
|
+
end
|
69
|
+
|
70
|
+
# pos個分の文字を先読みし、最後の文字を返す
|
71
|
+
#
|
72
|
+
# ファイルディスクリプタは移動しない(実行前の位置まで戻す)
|
73
|
+
# 行末の場合は(1文字ではなく)CR+LFを返す
|
74
|
+
# 行末の先に進んだ場合の挙動は未定義になる
|
75
|
+
def peek_char(pos)
|
76
|
+
original_pos = @file.pos
|
77
|
+
char = nil
|
78
|
+
|
43
79
|
begin
|
44
|
-
|
45
|
-
|
46
|
-
|
80
|
+
pos.times { read_char }
|
81
|
+
|
82
|
+
char = @file.getc
|
83
|
+
if char == CR
|
84
|
+
char2 = @file.getc
|
85
|
+
if char2 != LF
|
86
|
+
raise Aozora2Html::Error, Aozora2Html::I18n.t(:use_crlf)
|
87
|
+
end
|
88
|
+
|
89
|
+
char += char2
|
90
|
+
end
|
91
|
+
ensure
|
92
|
+
@file.seek(original_pos)
|
47
93
|
end
|
48
|
-
|
94
|
+
|
95
|
+
char
|
49
96
|
end
|
50
97
|
|
51
|
-
|
52
|
-
|
98
|
+
# 指定された終端文字(1文字のStringかCRLF)まで読み込む
|
99
|
+
#
|
100
|
+
# @param [String] endchar 終端文字
|
101
|
+
def read_to(endchar)
|
102
|
+
buf = +''
|
103
|
+
loop do
|
104
|
+
char = read_char
|
105
|
+
break if char == endchar
|
106
|
+
|
107
|
+
if char.is_a?(Symbol)
|
108
|
+
print endchar
|
109
|
+
end
|
110
|
+
buf.concat(char)
|
111
|
+
end
|
112
|
+
buf
|
113
|
+
end
|
114
|
+
|
115
|
+
# 1行読み込み
|
116
|
+
#
|
117
|
+
# @return [String] 読み込んだ文字列を返す
|
118
|
+
#
|
119
|
+
def read_line
|
120
|
+
read_to("\r\n")
|
53
121
|
end
|
54
122
|
|
55
123
|
def close
|
56
124
|
@file.close
|
57
125
|
end
|
58
126
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
127
|
+
# 現在の行数を返す
|
128
|
+
#
|
129
|
+
# 何も読み込む前は0、読み込み始めの最初の文字から\r\nまでが1、その次の文字から次の\r\nは2、……といった値になる
|
130
|
+
def line
|
131
|
+
if @file.pos == 0
|
132
|
+
0
|
133
|
+
elsif @current_char == CRLF
|
134
|
+
@line
|
63
135
|
else
|
64
|
-
|
136
|
+
@line + 1
|
65
137
|
end
|
66
|
-
@entry = true
|
67
138
|
end
|
68
139
|
end
|
@@ -426,6 +426,10 @@ presqu'<img src="../../../gaiji/1-09/1-09-68.png" alt="
|
|
426
426
|
<br />
|
427
427
|
�@�\�\�\�\�\�\�\�\�\�\<br />
|
428
428
|
<br />
|
429
|
+
�u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i"Et maintenant oserait-on soutenir q<img src="../../../gaiji/1-09/1-09-79.png" alt="��(�A�L���[�g�A�N�Z���g�t��U������)" class="gaiji" />il faut distinguer les lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat pathologique des lois de la vie <img src="../../../gaiji/1-09/1-09-54.png" alt="��(�O���[�u�A�N�Z���g�t��A������)" class="gaiji" /> l'<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />tat normal? Ce serait vouloir distinguer les lois de la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tombe, des lois do la m<img src="../../../gaiji/1-09/1-09-63.png" alt="��(�A�L���[�g�A�N�Z���g�t��E������)" class="gaiji" />canique dans une maison qui tient debout."�j<br />
|
430
|
+
<br />
|
431
|
+
�@�\�\�\�\�\�\�\�\�\�\<br />
|
432
|
+
<br />
|
429
433
|
���P�_<br />
|
430
434
|
<br />
|
431
435
|
���Ԃ�_<br />
|
@@ -756,6 +760,10 @@ Which, teaching us, hath this exordium: <span class="shatai">Nothing from nothin
|
|
756
760
|
<br />
|
757
761
|
<ruby><rb>���</rb><rp>�i</rp><rt>��������Ԃ�</rt><rp>�j</rp></ruby><span class="notes">�m���u��Ɂv�̍��Ɂu�}�}�v�̒��L�n</span><br />
|
758
762
|
<br />
|
763
|
+
�@�\�\�\�\�\�\�\�\�\�\<br />
|
764
|
+
<br />
|
765
|
+
�N�`�����m�I���b�`���n�A<ruby><rb>�H��</rb><rp>�i</rp><rt>�~ �~</rt><rp>�j</rp></ruby>�f<em class="sesame_dot">���X��</em>���g�C�f�C���E�`�j�A�O���O���}���b�e����<em class="sesame_dot">�g��</em>�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B<br />
|
766
|
+
<br />
|
759
767
|
���c�g�ݒ��ʼn��ɕ�����<br />
|
760
768
|
<br />
|
761
769
|
�ċ@�a<span dir="ltr">29</span>�̕ґ��́A<br />
|
@@ -976,6 +984,11 @@ Mors ubi dira fuit vita salusque patent.<br />
|
|
976
984
|
<br />
|
977
985
|
<h3 class="o-midashi"><a class="midashi_anchor" id="midashi1628"><ruby><rb>�����w</rb><rp>�i</rp><rt>����݂�����</rt><rp>�j</rp></ruby>��</a></h3>
|
978
986
|
<br />
|
987
|
+
�@�\�\�\�\�\�\�\�\�\�\<br />
|
988
|
+
<br />
|
989
|
+
<div class="jisage_8" style="margin-left: 8em"><h3 class="mado-o-midashi"><a class="midashi_anchor" id="midashi1728">��</a></h3>�i<span class="warichu">�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗�</span>�j</div>
|
990
|
+
<br />
|
991
|
+
<br />
|
979
992
|
���ɂ����������p�i�ԊO�j<br />
|
980
993
|
<br />
|
981
994
|
���{���I���<br />
|
@@ -1018,11 +1031,11 @@ Mors ubi dira fuit vita salusque patent.<br />
|
|
1018
1031
|
<div class="after_text">
|
1019
1032
|
<hr />
|
1020
1033
|
<br />
|
1021
|
-
���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ���o
|
1034
|
+
���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ���o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B<br />
|
1022
1035
|
���́F��<br />
|
1023
1036
|
�Z���F��<br />
|
1024
1037
|
2010�N3��30���쐬<br />
|
1025
|
-
|
1038
|
+
2021�N12��15���C��<br />
|
1026
1039
|
�ɍ쐬�t�@�C���F<br />
|
1027
1040
|
���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA<a href="http://www.aozora.gr.jp/">�Ɂihttp://www.aozora.gr.jp/�j</a>�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B<br />
|
1028
1041
|
<br />
|
@@ -436,6 +436,10 @@ presqu'
|
|
436
436
|
|
437
437
|
�@�\�\�\�\�\�\�\�\�\�\
|
438
438
|
|
439
|
+
�u����ɍ��̂Ƃ��늸���ĕa�I�Ȑ����@���Ɛ���Ȑ����@������ʂ��ׂ��ł���Ƃ��Ă���̂��낤���H����͓|�ꂽ�ƂŐ�������͊w�@���Ɨ����Ă���Ƃ̗͊w�@������ʂ���悤�Ȃ��̂ł���B�v�i�k"Et maintenant oserait-on soutenir qu'il faut distinguer les lois de la vie a` l'e'tat pathologique des lois de la vie a` l'e'tat normal? Ce serait vouloir distinguer les lois de la me'canique dans une maison qui tombe, des lois do la me'canique dans une maison qui tient debout."�l�j
|
440
|
+
|
441
|
+
�@�\�\�\�\�\�\�\�\�\�\
|
442
|
+
|
439
443
|
���P�_
|
440
444
|
|
441
445
|
���Ԃ�_
|
@@ -766,6 +770,10 @@ Which, teaching us, hath this exordium: Nothing from nothing ever yet was born.
|
|
766
770
|
|
767
771
|
��Ɂs��������Ԃt�m���u��Ɂv�̍��Ɂu�}�}�v�̒��L�n
|
768
772
|
|
773
|
+
�@�\�\�\�\�\�\�\�\�\�\
|
774
|
+
|
775
|
+
�N�`�����m�I���b�`���n�A�H��m���u�H��v�Ɂu�~�v�̖T�L�n�f���X���m���u���X���v�ɖT�_�n���g�C�f�C���E�`�j�A�O���O���}���b�e�����g�m���u�g�v�ɖT�_�n�K�J�P�e�g���f�L�e�A�\���K���l�j�A�^�b�e�A�^�I���e�ƃw�n�R�o���e�L�^�m�B
|
776
|
+
|
769
777
|
���c�g�ݒ��ʼn��ɕ�����
|
770
778
|
|
771
779
|
�ċ@�a29�m���u29�v�͏c�����n�̕ґ��́A
|
@@ -986,6 +994,11 @@ Mors ubi dira fuit vita salusque patent.
|
|
986
994
|
|
987
995
|
�����w�s����݂������t�Łm���u�����w�Łv�͑匩�o���n
|
988
996
|
|
997
|
+
�@�\�\�\�\�\�\�\�\�\�\
|
998
|
+
|
999
|
+
�m���W�������n���m���u���v�͑��匩�o���n�i�m�����蒍�n�n�A�́w�_�ȁi�n���сj�x�B�āA�́w�_�ȁi�ĉΕсj�x�B�V�A�́w�_�ȁi�V���сj�x�̗��m�����蒍�I���n�j
|
1000
|
+
|
1001
|
+
|
989
1002
|
���ɂ����������p�i�ԊO�j
|
990
1003
|
|
991
1004
|
���{���I���
|
@@ -1025,10 +1038,10 @@ Mors ubi dira fuit vita salusque patent.
|
|
1025
1038
|
|
1026
1039
|
|
1027
1040
|
�m���{���I���n
|
1028
|
-
���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ���o
|
1041
|
+
���̃e�L�X�g�́A�u���L�ꗗ�v�̋L�ڗ���o�������̂ƁAXHTML�ϊ������Ŗ��ɂȂ����T���v�����W�߂����̂ł��B
|
1029
1042
|
���́F��
|
1030
1043
|
�Z���F��
|
1031
1044
|
2010�N3��30���쐬
|
1032
|
-
|
1045
|
+
2021�N12��15���C��
|
1033
1046
|
�ɍ쐬�t�@�C���F
|
1034
1047
|
���̃t�@�C���́A�C���^�[�l�b�g�̐}���فA�Ɂihttp://www.aozora.gr.jp/�j�ō���܂����B���́A�Z���A����ɂ��������̂́A�{�����e�B�A�̊F����ł��B
|