cbeta 2.1.12 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 123f2333d78c24ec1f1371949ea1c1243bae0148
4
- data.tar.gz: c7d16491eedf4a3988811a53d7c22626cdcb6cbe
3
+ metadata.gz: e551ae5c7f351487c70ef2179a71d7b648c30c91
4
+ data.tar.gz: a4b0b03622c5dac61192b7c64b7cb212986fb984
5
5
  SHA512:
6
- metadata.gz: 18e8c1926ea95729142a3662329df3027d8c578c73eac8cc4f23a69eb80eef6b4ca58f3c42763e2025a7a8c28a818e34e2a89334fc4c2f3c31a2b1fdc317b0aa
7
- data.tar.gz: 323df55269c4d67c1421e93e814a34a13611d94f3f0e696a5826eee868d3f8bc057a02af4eb408f64ba4c204d70b2ddea57708b03393b0b1ffccd8ebad730de9
6
+ metadata.gz: 9528054a11f66d4ee4b8661aeeed0587c2bd225e70930d93b921d3ecee62176d753e09976b22fa7b360fe957405ff6fc2939406f51193a443e0f9bfcd9fe936d
7
+ data.tar.gz: c671708b3aa81700dc71257e00e0227f1165fd229d693cba5a3a34db0b212bada93b0e3348b3a45c7d8e581e9c7bc43232abb1acae170c7dc7d7119a24869c6c
@@ -33,36 +33,20 @@ class CBETA::P5aToHTMLForEveryEdition
33
33
 
34
34
  # 將 CBETA XML P5a 轉為 HTML
35
35
  #
36
- # @example for convert 大正藏第一冊:
37
- #
38
- # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
39
- # x2h.convert('T01')
40
- #
41
36
  # @example for convert 大正藏全部:
42
37
  #
43
38
  # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
44
39
  # x2h.convert('T')
45
40
  #
46
- # @example for convert 大正藏第五冊至第七冊:
47
- #
48
- # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
49
- # x2h.convert('T05..T07')
50
- #
51
41
  # T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: http://www.cbeta.org/format/id.php
52
42
  def convert(target=nil)
53
43
  return convert_all if target.nil?
54
44
 
55
45
  arg = target.upcase
56
46
  if arg.size == 1
57
- handle_collection(arg)
47
+ convert_canon(arg)
58
48
  else
59
- if arg.include? '..'
60
- arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
61
- handle_vols($1, $2)
62
- }
63
- else
64
- handle_vol(arg)
65
- end
49
+ puts "因為某些典籍單卷跨冊,轉檔必須以某部藏經為單位,例如參數 T 表示轉換整個大正藏。"
66
50
  end
67
51
  end
68
52
 
@@ -89,10 +73,76 @@ class CBETA::P5aToHTMLForEveryEdition
89
73
  def convert_all
90
74
  Dir.entries(@xml_root).sort.each do |c|
91
75
  next unless c.match(/^[A-Z]$/)
92
- handle_collection(c)
76
+ convert_canon(c)
77
+ end
78
+ end
79
+
80
+ def convert_canon(c)
81
+ @series = c
82
+ puts 'convert canon: ' + c
83
+ folder = File.join(@xml_root, @series)
84
+
85
+ @out_folder = File.join(@out_root, @series)
86
+ FileUtils::rm_rf @out_folder
87
+ FileUtils::mkdir_p @out_folder
88
+
89
+ @html_buf = {}
90
+ @back_buf = {}
91
+
92
+ Dir.entries(folder).sort.each do |vol|
93
+ next if vol.start_with? '.'
94
+ convert_vol(vol)
93
95
  end
94
96
  end
95
97
 
98
+ def convert_sutra(xml_fn)
99
+ puts "convert sutra #{xml_fn}"
100
+
101
+ before_parse_xml(xml_fn)
102
+
103
+ text = parse_xml(xml_fn)
104
+
105
+ # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
106
+ text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
107
+
108
+ juans = text.split(/(<juan \d+>)/)
109
+ open = false
110
+ fo = nil
111
+ juan_no = nil
112
+ fn = ''
113
+ buf = ''
114
+ # 一卷一檔
115
+ juans.each { |j|
116
+ if j =~ /<juan (\d+)>$/
117
+ juan_no = $1.to_i
118
+ elsif juan_no.nil?
119
+ buf = j
120
+ else
121
+ write_juan(juan_no, buf+j)
122
+ buf = ''
123
+ end
124
+ }
125
+ end
126
+
127
+
128
+ def convert_vol(vol)
129
+ puts "convert volumn: #{vol}"
130
+
131
+ @orig = @cbeta.get_canon_symbol(vol[0])
132
+ abort "未處理底本" if @orig.nil?
133
+ @orig_short = @orig.sub(/^【(.*)】$/, '\1')
134
+
135
+ @vol = vol
136
+
137
+ source = File.join(@xml_root, @series, vol)
138
+ Dir.entries(source).sort.each do |f|
139
+ next if f.start_with? '.'
140
+ fn = File.join(source, f)
141
+ convert_sutra(fn)
142
+ end
143
+ end
144
+
145
+
96
146
  def filter_html(html, ed)
97
147
  frag = Nokogiri::HTML.fragment(html)
98
148
  frag.search("r").each do |node|
@@ -163,16 +213,6 @@ class CBETA::P5aToHTMLForEveryEdition
163
213
  to_html(cell)
164
214
  end
165
215
 
166
- def handle_collection(c)
167
- @series = c
168
- puts 'handle_collection ' + c
169
- folder = File.join(@xml_root, @series)
170
- Dir.entries(folder).sort.each do |vol|
171
- next if vol.start_with? '.'
172
- handle_vol(vol)
173
- end
174
- end
175
-
176
216
  def handle_corr(e)
177
217
  r = ''
178
218
  if e.parent.name == 'choice'
@@ -601,35 +641,6 @@ class CBETA::P5aToHTMLForEveryEdition
601
641
  "<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
602
642
  end
603
643
 
604
- def handle_sutra(xml_fn)
605
- puts "convert sutra #{xml_fn}"
606
-
607
- before_parse_xml(xml_fn)
608
-
609
- text = parse_xml(xml_fn)
610
-
611
- # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
612
- text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
613
-
614
- juans = text.split(/(<juan \d+>)/)
615
- open = false
616
- fo = nil
617
- juan_no = nil
618
- fn = ''
619
- buf = ''
620
- # 一卷一檔
621
- juans.each { |j|
622
- if j =~ /<juan (\d+)>$/
623
- juan_no = $1.to_i
624
- elsif juan_no.nil?
625
- buf = j
626
- else
627
- write_juan(juan_no, buf+j)
628
- buf = ''
629
- end
630
- }
631
- end
632
-
633
644
  def handle_t(e)
634
645
  if e.has_attribute? 'place'
635
646
  return '' if e['place'].include? 'foot'
@@ -685,37 +696,6 @@ class CBETA::P5aToHTMLForEveryEdition
685
696
  def handle_unclear(e)
686
697
  '▆'
687
698
  end
688
-
689
- def handle_vol(vol)
690
- puts "convert volumn: #{vol}"
691
-
692
- @orig = @cbeta.get_canon_symbol(vol[0])
693
- abort "未處理底本" if @orig.nil?
694
- @orig_short = @orig.sub(/^【(.*)】$/, '\1')
695
-
696
- @vol = vol
697
- @series = vol[0]
698
- @out_folder = File.join(@out_root, @series)
699
- FileUtils::mkdir_p @out_folder
700
-
701
- source = File.join(@xml_root, @series, vol)
702
- Dir.entries(source).sort.each do |f|
703
- next if f.start_with? '.'
704
- fn = File.join(source, f)
705
- handle_sutra(fn)
706
- end
707
- end
708
-
709
- def handle_vols(v1, v2)
710
- puts "convert volumns: #{v1}..#{v2}"
711
- @series = v1[0]
712
- folder = File.join(@xml_root, @series)
713
- Dir.foreach(folder) { |vol|
714
- next if vol < v1
715
- next if vol > v2
716
- handle_vol(vol)
717
- }
718
- end
719
699
 
720
700
  def html_back(juan_no, ed)
721
701
  r = ''
@@ -735,11 +715,28 @@ class CBETA::P5aToHTMLForEveryEdition
735
715
  r
736
716
  end
737
717
 
738
- def html_copyright
718
+ def html_copyright(work, juan)
739
719
  r = "<div id='cbeta-copyright'><p>\n"
740
720
 
741
721
  orig = @cbeta.get_canon_nickname(@series)
742
- v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
722
+
723
+ # 處理 卷跨冊
724
+ if work=='L1557'
725
+ @title = '大方廣佛華嚴經疏鈔會本'
726
+ if @vol=='L131' and juan==17
727
+ v = '130-131'
728
+ elsif @vol=='L132' and juan==34
729
+ v = '131-132'
730
+ elsif @vol=='L133' and juan==51
731
+ v = '132-133'
732
+ end
733
+ elsif work=='X0714' and @vol=='X40' and juan==3
734
+ @title = '四分律含注戒本疏行宗記'
735
+ v = '39-40'
736
+ else
737
+ v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
738
+ end
739
+
743
740
  n = @sutra_no.sub(/^[A-Z]\d{2,3}n0*([^0].*)$/, '\1')
744
741
  r += "【經文資訊】#{orig}第 #{v} 冊 No. #{n} #{@title}<br/>\n"
745
742
  r += "【版本記錄】CBETA 電子佛典 版本日期:#{@edition_date}<br/>\n"
@@ -880,36 +877,57 @@ class CBETA::P5aToHTMLForEveryEdition
880
877
  folder = File.join(@out_folder, work, juan)
881
878
  FileUtils.remove_dir(folder, force=true)
882
879
  FileUtils.makedirs folder
880
+
883
881
  @editions.each do |ed|
884
882
  ed_html = filter_html(html, ed)
885
- text = "<div id='body'>#{ed_html}</div>"
886
-
887
883
  back = html_back(juan_no, ed)
888
- copyright = html_copyright
889
-
890
- fn = ed.sub(/^【(.*)】$/, '\1')
891
- if fn != 'CBETA' and fn != @orig_short
892
- fn = @orig_short + '' + fn
884
+
885
+ # 如果是卷跨冊的上半部
886
+ if (work=='L1557' and @vol=='L130' and juan_no==17) or
887
+ (work=='L1557' and @vol=='L131' and juan_no==34) or
888
+ (work=='L1557' and @vol=='L132' and juan_no==51) or
889
+ (work=='X0714' and @vol=='X39' and juan_no==3)
890
+ @html_buf[ed] = ed_html
891
+ @back_buf[ed] = back
892
+ next
893
+ else
894
+ body = ed_html
895
+ unless @html_buf.empty?
896
+ body = @html_buf[ed] + body
897
+ @html_buf.delete ed
898
+ end
899
+ back = @back_buf[ed] + back unless @back_buf.empty?
900
+ copyright = html_copyright(work, juan_no)
901
+ write_juan_ed(folder, ed, body, back, copyright)
902
+
903
+ @back_buf.delete ed
893
904
  end
894
- fn += '.htm'
895
- output_path = File.join(folder, fn)
896
- text = <<eos
905
+ end
906
+ end
907
+
908
+ def write_juan_ed(folder, ed, body, back, copyright)
909
+ fn = ed.sub(/^【(.*)】$/, '\1')
910
+ if fn != 'CBETA' and fn != @orig_short
911
+ fn = @orig_short + '→' + fn
912
+ end
913
+ fn += '.htm'
914
+ output_path = File.join(folder, fn)
915
+ text = <<eos
897
916
  <html>
898
917
  <head>
899
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
900
- <title>#{@title}</title>
918
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
919
+ <title>#{@title}</title>
901
920
  </head>
902
921
  <body>
903
- #{text}
904
- <div id='back'>
905
- #{back}
906
- </div>
907
- #{copyright}
922
+ <div id='body'>#{body}</div>
923
+ <div id='back'>
924
+ #{back}
925
+ </div>
926
+ #{copyright}
908
927
  </body></html>
909
928
  eos
910
- puts "write #{output_path}"
911
- File.write(output_path, text)
912
- end
929
+ puts "write: #{output_path}"
930
+ File.write(output_path, text)
913
931
  end
914
932
 
915
933
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.12
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-31 00:00:00.000000000 Z
11
+ date: 2016-06-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com