cbeta 2.1.12 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cbeta/p5a_to_html_for_every_edition.rb +129 -111
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e551ae5c7f351487c70ef2179a71d7b648c30c91
|
4
|
+
data.tar.gz: a4b0b03622c5dac61192b7c64b7cb212986fb984
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9528054a11f66d4ee4b8661aeeed0587c2bd225e70930d93b921d3ecee62176d753e09976b22fa7b360fe957405ff6fc2939406f51193a443e0f9bfcd9fe936d
|
7
|
+
data.tar.gz: c671708b3aa81700dc71257e00e0227f1165fd229d693cba5a3a34db0b212bada93b0e3348b3a45c7d8e581e9c7bc43232abb1acae170c7dc7d7119a24869c6c
|
@@ -33,36 +33,20 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
33
33
|
|
34
34
|
# 將 CBETA XML P5a 轉為 HTML
|
35
35
|
#
|
36
|
-
# @example for convert 大正藏第一冊:
|
37
|
-
#
|
38
|
-
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
39
|
-
# x2h.convert('T01')
|
40
|
-
#
|
41
36
|
# @example for convert 大正藏全部:
|
42
37
|
#
|
43
38
|
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
44
39
|
# x2h.convert('T')
|
45
40
|
#
|
46
|
-
# @example for convert 大正藏第五冊至第七冊:
|
47
|
-
#
|
48
|
-
# x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
|
49
|
-
# x2h.convert('T05..T07')
|
50
|
-
#
|
51
41
|
# T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: http://www.cbeta.org/format/id.php
|
52
42
|
def convert(target=nil)
|
53
43
|
return convert_all if target.nil?
|
54
44
|
|
55
45
|
arg = target.upcase
|
56
46
|
if arg.size == 1
|
57
|
-
|
47
|
+
convert_canon(arg)
|
58
48
|
else
|
59
|
-
|
60
|
-
arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
|
61
|
-
handle_vols($1, $2)
|
62
|
-
}
|
63
|
-
else
|
64
|
-
handle_vol(arg)
|
65
|
-
end
|
49
|
+
puts "因為某些典籍單卷跨冊,轉檔必須以某部藏經為單位,例如參數 T 表示轉換整個大正藏。"
|
66
50
|
end
|
67
51
|
end
|
68
52
|
|
@@ -89,10 +73,76 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
89
73
|
def convert_all
|
90
74
|
Dir.entries(@xml_root).sort.each do |c|
|
91
75
|
next unless c.match(/^[A-Z]$/)
|
92
|
-
|
76
|
+
convert_canon(c)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert_canon(c)
|
81
|
+
@series = c
|
82
|
+
puts 'convert canon: ' + c
|
83
|
+
folder = File.join(@xml_root, @series)
|
84
|
+
|
85
|
+
@out_folder = File.join(@out_root, @series)
|
86
|
+
FileUtils::rm_rf @out_folder
|
87
|
+
FileUtils::mkdir_p @out_folder
|
88
|
+
|
89
|
+
@html_buf = {}
|
90
|
+
@back_buf = {}
|
91
|
+
|
92
|
+
Dir.entries(folder).sort.each do |vol|
|
93
|
+
next if vol.start_with? '.'
|
94
|
+
convert_vol(vol)
|
93
95
|
end
|
94
96
|
end
|
95
97
|
|
98
|
+
def convert_sutra(xml_fn)
|
99
|
+
puts "convert sutra #{xml_fn}"
|
100
|
+
|
101
|
+
before_parse_xml(xml_fn)
|
102
|
+
|
103
|
+
text = parse_xml(xml_fn)
|
104
|
+
|
105
|
+
# 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
|
106
|
+
text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
|
107
|
+
|
108
|
+
juans = text.split(/(<juan \d+>)/)
|
109
|
+
open = false
|
110
|
+
fo = nil
|
111
|
+
juan_no = nil
|
112
|
+
fn = ''
|
113
|
+
buf = ''
|
114
|
+
# 一卷一檔
|
115
|
+
juans.each { |j|
|
116
|
+
if j =~ /<juan (\d+)>$/
|
117
|
+
juan_no = $1.to_i
|
118
|
+
elsif juan_no.nil?
|
119
|
+
buf = j
|
120
|
+
else
|
121
|
+
write_juan(juan_no, buf+j)
|
122
|
+
buf = ''
|
123
|
+
end
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
def convert_vol(vol)
|
129
|
+
puts "convert volumn: #{vol}"
|
130
|
+
|
131
|
+
@orig = @cbeta.get_canon_symbol(vol[0])
|
132
|
+
abort "未處理底本" if @orig.nil?
|
133
|
+
@orig_short = @orig.sub(/^【(.*)】$/, '\1')
|
134
|
+
|
135
|
+
@vol = vol
|
136
|
+
|
137
|
+
source = File.join(@xml_root, @series, vol)
|
138
|
+
Dir.entries(source).sort.each do |f|
|
139
|
+
next if f.start_with? '.'
|
140
|
+
fn = File.join(source, f)
|
141
|
+
convert_sutra(fn)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
|
96
146
|
def filter_html(html, ed)
|
97
147
|
frag = Nokogiri::HTML.fragment(html)
|
98
148
|
frag.search("r").each do |node|
|
@@ -163,16 +213,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
163
213
|
to_html(cell)
|
164
214
|
end
|
165
215
|
|
166
|
-
def handle_collection(c)
|
167
|
-
@series = c
|
168
|
-
puts 'handle_collection ' + c
|
169
|
-
folder = File.join(@xml_root, @series)
|
170
|
-
Dir.entries(folder).sort.each do |vol|
|
171
|
-
next if vol.start_with? '.'
|
172
|
-
handle_vol(vol)
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
216
|
def handle_corr(e)
|
177
217
|
r = ''
|
178
218
|
if e.parent.name == 'choice'
|
@@ -601,35 +641,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
601
641
|
"<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
|
602
642
|
end
|
603
643
|
|
604
|
-
def handle_sutra(xml_fn)
|
605
|
-
puts "convert sutra #{xml_fn}"
|
606
|
-
|
607
|
-
before_parse_xml(xml_fn)
|
608
|
-
|
609
|
-
text = parse_xml(xml_fn)
|
610
|
-
|
611
|
-
# 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
|
612
|
-
text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
|
613
|
-
|
614
|
-
juans = text.split(/(<juan \d+>)/)
|
615
|
-
open = false
|
616
|
-
fo = nil
|
617
|
-
juan_no = nil
|
618
|
-
fn = ''
|
619
|
-
buf = ''
|
620
|
-
# 一卷一檔
|
621
|
-
juans.each { |j|
|
622
|
-
if j =~ /<juan (\d+)>$/
|
623
|
-
juan_no = $1.to_i
|
624
|
-
elsif juan_no.nil?
|
625
|
-
buf = j
|
626
|
-
else
|
627
|
-
write_juan(juan_no, buf+j)
|
628
|
-
buf = ''
|
629
|
-
end
|
630
|
-
}
|
631
|
-
end
|
632
|
-
|
633
644
|
def handle_t(e)
|
634
645
|
if e.has_attribute? 'place'
|
635
646
|
return '' if e['place'].include? 'foot'
|
@@ -685,37 +696,6 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
685
696
|
def handle_unclear(e)
|
686
697
|
'▆'
|
687
698
|
end
|
688
|
-
|
689
|
-
def handle_vol(vol)
|
690
|
-
puts "convert volumn: #{vol}"
|
691
|
-
|
692
|
-
@orig = @cbeta.get_canon_symbol(vol[0])
|
693
|
-
abort "未處理底本" if @orig.nil?
|
694
|
-
@orig_short = @orig.sub(/^【(.*)】$/, '\1')
|
695
|
-
|
696
|
-
@vol = vol
|
697
|
-
@series = vol[0]
|
698
|
-
@out_folder = File.join(@out_root, @series)
|
699
|
-
FileUtils::mkdir_p @out_folder
|
700
|
-
|
701
|
-
source = File.join(@xml_root, @series, vol)
|
702
|
-
Dir.entries(source).sort.each do |f|
|
703
|
-
next if f.start_with? '.'
|
704
|
-
fn = File.join(source, f)
|
705
|
-
handle_sutra(fn)
|
706
|
-
end
|
707
|
-
end
|
708
|
-
|
709
|
-
def handle_vols(v1, v2)
|
710
|
-
puts "convert volumns: #{v1}..#{v2}"
|
711
|
-
@series = v1[0]
|
712
|
-
folder = File.join(@xml_root, @series)
|
713
|
-
Dir.foreach(folder) { |vol|
|
714
|
-
next if vol < v1
|
715
|
-
next if vol > v2
|
716
|
-
handle_vol(vol)
|
717
|
-
}
|
718
|
-
end
|
719
699
|
|
720
700
|
def html_back(juan_no, ed)
|
721
701
|
r = ''
|
@@ -735,11 +715,28 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
735
715
|
r
|
736
716
|
end
|
737
717
|
|
738
|
-
def html_copyright
|
718
|
+
def html_copyright(work, juan)
|
739
719
|
r = "<div id='cbeta-copyright'><p>\n"
|
740
720
|
|
741
721
|
orig = @cbeta.get_canon_nickname(@series)
|
742
|
-
|
722
|
+
|
723
|
+
# 處理 卷跨冊
|
724
|
+
if work=='L1557'
|
725
|
+
@title = '大方廣佛華嚴經疏鈔會本'
|
726
|
+
if @vol=='L131' and juan==17
|
727
|
+
v = '130-131'
|
728
|
+
elsif @vol=='L132' and juan==34
|
729
|
+
v = '131-132'
|
730
|
+
elsif @vol=='L133' and juan==51
|
731
|
+
v = '132-133'
|
732
|
+
end
|
733
|
+
elsif work=='X0714' and @vol=='X40' and juan==3
|
734
|
+
@title = '四分律含注戒本疏行宗記'
|
735
|
+
v = '39-40'
|
736
|
+
else
|
737
|
+
v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
|
738
|
+
end
|
739
|
+
|
743
740
|
n = @sutra_no.sub(/^[A-Z]\d{2,3}n0*([^0].*)$/, '\1')
|
744
741
|
r += "【經文資訊】#{orig}第 #{v} 冊 No. #{n} #{@title}<br/>\n"
|
745
742
|
r += "【版本記錄】CBETA 電子佛典 版本日期:#{@edition_date}<br/>\n"
|
@@ -880,36 +877,57 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
880
877
|
folder = File.join(@out_folder, work, juan)
|
881
878
|
FileUtils.remove_dir(folder, force=true)
|
882
879
|
FileUtils.makedirs folder
|
880
|
+
|
883
881
|
@editions.each do |ed|
|
884
882
|
ed_html = filter_html(html, ed)
|
885
|
-
text = "<div id='body'>#{ed_html}</div>"
|
886
|
-
|
887
883
|
back = html_back(juan_no, ed)
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
884
|
+
|
885
|
+
# 如果是卷跨冊的上半部
|
886
|
+
if (work=='L1557' and @vol=='L130' and juan_no==17) or
|
887
|
+
(work=='L1557' and @vol=='L131' and juan_no==34) or
|
888
|
+
(work=='L1557' and @vol=='L132' and juan_no==51) or
|
889
|
+
(work=='X0714' and @vol=='X39' and juan_no==3)
|
890
|
+
@html_buf[ed] = ed_html
|
891
|
+
@back_buf[ed] = back
|
892
|
+
next
|
893
|
+
else
|
894
|
+
body = ed_html
|
895
|
+
unless @html_buf.empty?
|
896
|
+
body = @html_buf[ed] + body
|
897
|
+
@html_buf.delete ed
|
898
|
+
end
|
899
|
+
back = @back_buf[ed] + back unless @back_buf.empty?
|
900
|
+
copyright = html_copyright(work, juan_no)
|
901
|
+
write_juan_ed(folder, ed, body, back, copyright)
|
902
|
+
|
903
|
+
@back_buf.delete ed
|
893
904
|
end
|
894
|
-
|
895
|
-
|
896
|
-
|
905
|
+
end
|
906
|
+
end
|
907
|
+
|
908
|
+
def write_juan_ed(folder, ed, body, back, copyright)
|
909
|
+
fn = ed.sub(/^【(.*)】$/, '\1')
|
910
|
+
if fn != 'CBETA' and fn != @orig_short
|
911
|
+
fn = @orig_short + '→' + fn
|
912
|
+
end
|
913
|
+
fn += '.htm'
|
914
|
+
output_path = File.join(folder, fn)
|
915
|
+
text = <<eos
|
897
916
|
<html>
|
898
917
|
<head>
|
899
|
-
|
900
|
-
|
918
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
919
|
+
<title>#{@title}</title>
|
901
920
|
</head>
|
902
921
|
<body>
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
922
|
+
<div id='body'>#{body}</div>
|
923
|
+
<div id='back'>
|
924
|
+
#{back}
|
925
|
+
</div>
|
926
|
+
#{copyright}
|
908
927
|
</body></html>
|
909
928
|
eos
|
910
|
-
|
911
|
-
|
912
|
-
end
|
929
|
+
puts "write: #{output_path}"
|
930
|
+
File.write(output_path, text)
|
913
931
|
end
|
914
932
|
|
915
933
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|