cbeta 2.1.12 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/p5a_to_html_for_every_edition.rb +129 -111
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e551ae5c7f351487c70ef2179a71d7b648c30c91
|
4
|
+
data.tar.gz: a4b0b03622c5dac61192b7c64b7cb212986fb984
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9528054a11f66d4ee4b8661aeeed0587c2bd225e70930d93b921d3ecee62176d753e09976b22fa7b360fe957405ff6fc2939406f51193a443e0f9bfcd9fe936d
|
7
|
+
data.tar.gz: c671708b3aa81700dc71257e00e0227f1165fd229d693cba5a3a34db0b212bada93b0e3348b3a45c7d8e581e9c7bc43232abb1acae170c7dc7d7119a24869c6c
|
@@ -33,36 +33,20 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
33
33
|
|
34
34
|
# 將 CBETA XML P5a 轉為 HTML
|
35
35
|
#
|
36
|
-
# @example for convert 大正藏第一冊:
|
37
|
-
#
|
38
|
-
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
39
|
-
# x2h.convert('T01')
|
40
|
-
#
|
41
36
|
# @example for convert 大正藏全部:
|
42
37
|
#
|
43
38
|
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
44
39
|
# x2h.convert('T')
|
45
40
|
#
|
46
|
-
# @example for convert 大正藏第五冊至第七冊:
|
47
|
-
#
|
48
|
-
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
49
|
-
# x2h.convert('T05..T07')
|
50
|
-
#
|
51
41
|
# T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: http://www.cbeta.org/format/id.php
|
52
42
|
def convert(target=nil)
|
53
43
|
return convert_all if target.nil?
|
54
44
|
|
55
45
|
arg = target.upcase
|
56
46
|
if arg.size == 1
|
57
|
-
|
47
|
+
convert_canon(arg)
|
58
48
|
else
|
59
|
-
|
60
|
-
arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
|
61
|
-
handle_vols($1, $2)
|
62
|
-
}
|
63
|
-
else
|
64
|
-
handle_vol(arg)
|
65
|
-
end
|
49
|
+
puts "因為某些典籍單卷跨冊,轉檔必須以某部藏經為單位,例如參數 T 表示轉換整個大正藏。"
|
66
50
|
end
|
67
51
|
end
|
68
52
|
|
@@ -89,10 +73,76 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
89
73
|
def convert_all
|
90
74
|
Dir.entries(@xml_root).sort.each do |c|
|
91
75
|
next unless c.match(/^[A-Z]$/)
|
92
|
-
|
76
|
+
convert_canon(c)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_canon(c)
|
81
|
+
@series = c
|
82
|
+
puts 'convert canon: ' + c
|
83
|
+
folder = File.join(@xml_root, @series)
|
84
|
+
|
85
|
+
@out_folder = File.join(@out_root, @series)
|
86
|
+
FileUtils::rm_rf @out_folder
|
87
|
+
FileUtils::mkdir_p @out_folder
|
88
|
+
|
89
|
+
@html_buf = {}
|
90
|
+
@back_buf = {}
|
91
|
+
|
92
|
+
Dir.entries(folder).sort.each do |vol|
|
93
|
+
next if vol.start_with? '.'
|
94
|
+
convert_vol(vol)
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
98
|
+
def convert_sutra(xml_fn)
|
99
|
+
puts "convert sutra #{xml_fn}"
|
100
|
+
|
101
|
+
before_parse_xml(xml_fn)
|
102
|
+
|
103
|
+
text = parse_xml(xml_fn)
|
104
|
+
|
105
|
+
# 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
|
106
|
+
text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
|
107
|
+
|
108
|
+
juans = text.split(/(<juan \d+>)/)
|
109
|
+
open = false
|
110
|
+
fo = nil
|
111
|
+
juan_no = nil
|
112
|
+
fn = ''
|
113
|
+
buf = ''
|
114
|
+
# 一卷一檔
|
115
|
+
juans.each { |j|
|
116
|
+
if j =~ /<juan (\d+)>$/
|
117
|
+
juan_no = $1.to_i
|
118
|
+
elsif juan_no.nil?
|
119
|
+
buf = j
|
120
|
+
else
|
121
|
+
write_juan(juan_no, buf+j)
|
122
|
+
buf = ''
|
123
|
+
end
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
def convert_vol(vol)
|
129
|
+
puts "convert volumn: #{vol}"
|
130
|
+
|
131
|
+
@orig = @cbeta.get_canon_symbol(vol[0])
|
132
|
+
abort "未處理底本" if @orig.nil?
|
133
|
+
@orig_short = @orig.sub(/^【(.*)】$/, '\1')
|
134
|
+
|
135
|
+
@vol = vol
|
136
|
+
|
137
|
+
source = File.join(@xml_root, @series, vol)
|
138
|
+
Dir.entries(source).sort.each do |f|
|
139
|
+
next if f.start_with? '.'
|
140
|
+
fn = File.join(source, f)
|
141
|
+
convert_sutra(fn)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
|
96
146
|
def filter_html(html, ed)
|
97
147
|
frag = Nokogiri::HTML.fragment(html)
|
98
148
|
frag.search("r").each do |node|
|
@@ -163,16 +213,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
163
213
|
to_html(cell)
|
164
214
|
end
|
165
215
|
|
166
|
-
def handle_collection(c)
|
167
|
-
@series = c
|
168
|
-
puts 'handle_collection ' + c
|
169
|
-
folder = File.join(@xml_root, @series)
|
170
|
-
Dir.entries(folder).sort.each do |vol|
|
171
|
-
next if vol.start_with? '.'
|
172
|
-
handle_vol(vol)
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
216
|
def handle_corr(e)
|
177
217
|
r = ''
|
178
218
|
if e.parent.name == 'choice'
|
@@ -601,35 +641,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
601
641
|
"<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
|
602
642
|
end
|
603
643
|
|
604
|
-
def handle_sutra(xml_fn)
|
605
|
-
puts "convert sutra #{xml_fn}"
|
606
|
-
|
607
|
-
before_parse_xml(xml_fn)
|
608
|
-
|
609
|
-
text = parse_xml(xml_fn)
|
610
|
-
|
611
|
-
# 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
|
612
|
-
text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
|
613
|
-
|
614
|
-
juans = text.split(/(<juan \d+>)/)
|
615
|
-
open = false
|
616
|
-
fo = nil
|
617
|
-
juan_no = nil
|
618
|
-
fn = ''
|
619
|
-
buf = ''
|
620
|
-
# 一卷一檔
|
621
|
-
juans.each { |j|
|
622
|
-
if j =~ /<juan (\d+)>$/
|
623
|
-
juan_no = $1.to_i
|
624
|
-
elsif juan_no.nil?
|
625
|
-
buf = j
|
626
|
-
else
|
627
|
-
write_juan(juan_no, buf+j)
|
628
|
-
buf = ''
|
629
|
-
end
|
630
|
-
}
|
631
|
-
end
|
632
|
-
|
633
644
|
def handle_t(e)
|
634
645
|
if e.has_attribute? 'place'
|
635
646
|
return '' if e['place'].include? 'foot'
|
@@ -685,37 +696,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
685
696
|
def handle_unclear(e)
|
686
697
|
'▆'
|
687
698
|
end
|
688
|
-
|
689
|
-
def handle_vol(vol)
|
690
|
-
puts "convert volumn: #{vol}"
|
691
|
-
|
692
|
-
@orig = @cbeta.get_canon_symbol(vol[0])
|
693
|
-
abort "未處理底本" if @orig.nil?
|
694
|
-
@orig_short = @orig.sub(/^【(.*)】$/, '\1')
|
695
|
-
|
696
|
-
@vol = vol
|
697
|
-
@series = vol[0]
|
698
|
-
@out_folder = File.join(@out_root, @series)
|
699
|
-
FileUtils::mkdir_p @out_folder
|
700
|
-
|
701
|
-
source = File.join(@xml_root, @series, vol)
|
702
|
-
Dir.entries(source).sort.each do |f|
|
703
|
-
next if f.start_with? '.'
|
704
|
-
fn = File.join(source, f)
|
705
|
-
handle_sutra(fn)
|
706
|
-
end
|
707
|
-
end
|
708
|
-
|
709
|
-
def handle_vols(v1, v2)
|
710
|
-
puts "convert volumns: #{v1}..#{v2}"
|
711
|
-
@series = v1[0]
|
712
|
-
folder = File.join(@xml_root, @series)
|
713
|
-
Dir.foreach(folder) { |vol|
|
714
|
-
next if vol < v1
|
715
|
-
next if vol > v2
|
716
|
-
handle_vol(vol)
|
717
|
-
}
|
718
|
-
end
|
719
699
|
|
720
700
|
def html_back(juan_no, ed)
|
721
701
|
r = ''
|
@@ -735,11 +715,28 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
735
715
|
r
|
736
716
|
end
|
737
717
|
|
738
|
-
def html_copyright
|
718
|
+
def html_copyright(work, juan)
|
739
719
|
r = "<div id='cbeta-copyright'><p>\n"
|
740
720
|
|
741
721
|
orig = @cbeta.get_canon_nickname(@series)
|
742
|
-
|
722
|
+
|
723
|
+
# 處理 卷跨冊
|
724
|
+
if work=='L1557'
|
725
|
+
@title = '大方廣佛華嚴經疏鈔會本'
|
726
|
+
if @vol=='L131' and juan==17
|
727
|
+
v = '130-131'
|
728
|
+
elsif @vol=='L132' and juan==34
|
729
|
+
v = '131-132'
|
730
|
+
elsif @vol=='L133' and juan==51
|
731
|
+
v = '132-133'
|
732
|
+
end
|
733
|
+
elsif work=='X0714' and @vol=='X40' and juan==3
|
734
|
+
@title = '四分律含注戒本疏行宗記'
|
735
|
+
v = '39-40'
|
736
|
+
else
|
737
|
+
v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
|
738
|
+
end
|
739
|
+
|
743
740
|
n = @sutra_no.sub(/^[A-Z]\d{2,3}n0*([^0].*)$/, '\1')
|
744
741
|
r += "【經文資訊】#{orig}第 #{v} 冊 No. #{n} #{@title}<br/>\n"
|
745
742
|
r += "【版本記錄】CBETA 電子佛典 版本日期:#{@edition_date}<br/>\n"
|
@@ -880,36 +877,57 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
880
877
|
folder = File.join(@out_folder, work, juan)
|
881
878
|
FileUtils.remove_dir(folder, force=true)
|
882
879
|
FileUtils.makedirs folder
|
880
|
+
|
883
881
|
@editions.each do |ed|
|
884
882
|
ed_html = filter_html(html, ed)
|
885
|
-
text = "<div id='body'>#{ed_html}</div>"
|
886
|
-
|
887
883
|
back = html_back(juan_no, ed)
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
884
|
+
|
885
|
+
# 如果是卷跨冊的上半部
|
886
|
+
if (work=='L1557' and @vol=='L130' and juan_no==17) or
|
887
|
+
(work=='L1557' and @vol=='L131' and juan_no==34) or
|
888
|
+
(work=='L1557' and @vol=='L132' and juan_no==51) or
|
889
|
+
(work=='X0714' and @vol=='X39' and juan_no==3)
|
890
|
+
@html_buf[ed] = ed_html
|
891
|
+
@back_buf[ed] = back
|
892
|
+
next
|
893
|
+
else
|
894
|
+
body = ed_html
|
895
|
+
unless @html_buf.empty?
|
896
|
+
body = @html_buf[ed] + body
|
897
|
+
@html_buf.delete ed
|
898
|
+
end
|
899
|
+
back = @back_buf[ed] + back unless @back_buf.empty?
|
900
|
+
copyright = html_copyright(work, juan_no)
|
901
|
+
write_juan_ed(folder, ed, body, back, copyright)
|
902
|
+
|
903
|
+
@back_buf.delete ed
|
893
904
|
end
|
894
|
-
|
895
|
-
|
896
|
-
|
905
|
+
end
|
906
|
+
end
|
907
|
+
|
908
|
+
def write_juan_ed(folder, ed, body, back, copyright)
|
909
|
+
fn = ed.sub(/^【(.*)】$/, '\1')
|
910
|
+
if fn != 'CBETA' and fn != @orig_short
|
911
|
+
fn = @orig_short + '→' + fn
|
912
|
+
end
|
913
|
+
fn += '.htm'
|
914
|
+
output_path = File.join(folder, fn)
|
915
|
+
text = <<eos
|
897
916
|
<html>
|
898
917
|
<head>
|
899
|
-
|
900
|
-
|
918
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
919
|
+
<title>#{@title}</title>
|
901
920
|
</head>
|
902
921
|
<body>
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
922
|
+
<div id='body'>#{body}</div>
|
923
|
+
<div id='back'>
|
924
|
+
#{back}
|
925
|
+
</div>
|
926
|
+
#{copyright}
|
908
927
|
</body></html>
|
909
928
|
eos
|
910
|
-
|
911
|
-
|
912
|
-
end
|
929
|
+
puts "write: #{output_path}"
|
930
|
+
File.write(output_path, text)
|
913
931
|
end
|
914
932
|
|
915
933
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|