cbeta 2.1.12 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 123f2333d78c24ec1f1371949ea1c1243bae0148
4
- data.tar.gz: c7d16491eedf4a3988811a53d7c22626cdcb6cbe
3
+ metadata.gz: e551ae5c7f351487c70ef2179a71d7b648c30c91
4
+ data.tar.gz: a4b0b03622c5dac61192b7c64b7cb212986fb984
5
5
  SHA512:
6
- metadata.gz: 18e8c1926ea95729142a3662329df3027d8c578c73eac8cc4f23a69eb80eef6b4ca58f3c42763e2025a7a8c28a818e34e2a89334fc4c2f3c31a2b1fdc317b0aa
7
- data.tar.gz: 323df55269c4d67c1421e93e814a34a13611d94f3f0e696a5826eee868d3f8bc057a02af4eb408f64ba4c204d70b2ddea57708b03393b0b1ffccd8ebad730de9
6
+ metadata.gz: 9528054a11f66d4ee4b8661aeeed0587c2bd225e70930d93b921d3ecee62176d753e09976b22fa7b360fe957405ff6fc2939406f51193a443e0f9bfcd9fe936d
7
+ data.tar.gz: c671708b3aa81700dc71257e00e0227f1165fd229d693cba5a3a34db0b212bada93b0e3348b3a45c7d8e581e9c7bc43232abb1acae170c7dc7d7119a24869c6c
@@ -33,36 +33,20 @@ class CBETA::P5aToHTMLForEveryEdition
33
33
 
34
34
  # 將 CBETA XML P5a 轉為 HTML
35
35
  #
36
- # @example for convert 大正藏第一冊:
37
- #
38
- # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
39
- # x2h.convert('T01')
40
- #
41
36
  # @example for convert 大正藏全部:
42
37
  #
43
38
  # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
44
39
  # x2h.convert('T')
45
40
  #
46
- # @example for convert 大正藏第五冊至第七冊:
47
- #
48
- # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
49
- # x2h.convert('T05..T07')
50
- #
51
41
  # T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: http://www.cbeta.org/format/id.php
52
42
  def convert(target=nil)
53
43
  return convert_all if target.nil?
54
44
 
55
45
  arg = target.upcase
56
46
  if arg.size == 1
57
- handle_collection(arg)
47
+ convert_canon(arg)
58
48
  else
59
- if arg.include? '..'
60
- arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
61
- handle_vols($1, $2)
62
- }
63
- else
64
- handle_vol(arg)
65
- end
49
+ puts "因為某些典籍單卷跨冊,轉檔必須以某部藏經為單位,例如參數 T 表示轉換整個大正藏。"
66
50
  end
67
51
  end
68
52
 
@@ -89,10 +73,76 @@ class CBETA::P5aToHTMLForEveryEdition
89
73
  def convert_all
90
74
  Dir.entries(@xml_root).sort.each do |c|
91
75
  next unless c.match(/^[A-Z]$/)
92
- handle_collection(c)
76
+ convert_canon(c)
77
+ end
78
+ end
79
+
80
+ def convert_canon(c)
81
+ @series = c
82
+ puts 'convert canon: ' + c
83
+ folder = File.join(@xml_root, @series)
84
+
85
+ @out_folder = File.join(@out_root, @series)
86
+ FileUtils::rm_rf @out_folder
87
+ FileUtils::mkdir_p @out_folder
88
+
89
+ @html_buf = {}
90
+ @back_buf = {}
91
+
92
+ Dir.entries(folder).sort.each do |vol|
93
+ next if vol.start_with? '.'
94
+ convert_vol(vol)
93
95
  end
94
96
  end
95
97
 
98
+ def convert_sutra(xml_fn)
99
+ puts "convert sutra #{xml_fn}"
100
+
101
+ before_parse_xml(xml_fn)
102
+
103
+ text = parse_xml(xml_fn)
104
+
105
+ # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
106
+ text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
107
+
108
+ juans = text.split(/(<juan \d+>)/)
109
+ open = false
110
+ fo = nil
111
+ juan_no = nil
112
+ fn = ''
113
+ buf = ''
114
+ # 一卷一檔
115
+ juans.each { |j|
116
+ if j =~ /<juan (\d+)>$/
117
+ juan_no = $1.to_i
118
+ elsif juan_no.nil?
119
+ buf = j
120
+ else
121
+ write_juan(juan_no, buf+j)
122
+ buf = ''
123
+ end
124
+ }
125
+ end
126
+
127
+
128
+ def convert_vol(vol)
129
+ puts "convert volumn: #{vol}"
130
+
131
+ @orig = @cbeta.get_canon_symbol(vol[0])
132
+ abort "未處理底本" if @orig.nil?
133
+ @orig_short = @orig.sub(/^【(.*)】$/, '\1')
134
+
135
+ @vol = vol
136
+
137
+ source = File.join(@xml_root, @series, vol)
138
+ Dir.entries(source).sort.each do |f|
139
+ next if f.start_with? '.'
140
+ fn = File.join(source, f)
141
+ convert_sutra(fn)
142
+ end
143
+ end
144
+
145
+
96
146
  def filter_html(html, ed)
97
147
  frag = Nokogiri::HTML.fragment(html)
98
148
  frag.search("r").each do |node|
@@ -163,16 +213,6 @@ class CBETA::P5aToHTMLForEveryEdition
163
213
  to_html(cell)
164
214
  end
165
215
 
166
- def handle_collection(c)
167
- @series = c
168
- puts 'handle_collection ' + c
169
- folder = File.join(@xml_root, @series)
170
- Dir.entries(folder).sort.each do |vol|
171
- next if vol.start_with? '.'
172
- handle_vol(vol)
173
- end
174
- end
175
-
176
216
  def handle_corr(e)
177
217
  r = ''
178
218
  if e.parent.name == 'choice'
@@ -601,35 +641,6 @@ class CBETA::P5aToHTMLForEveryEdition
601
641
  "<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
602
642
  end
603
643
 
604
- def handle_sutra(xml_fn)
605
- puts "convert sutra #{xml_fn}"
606
-
607
- before_parse_xml(xml_fn)
608
-
609
- text = parse_xml(xml_fn)
610
-
611
- # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
612
- text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
613
-
614
- juans = text.split(/(<juan \d+>)/)
615
- open = false
616
- fo = nil
617
- juan_no = nil
618
- fn = ''
619
- buf = ''
620
- # 一卷一檔
621
- juans.each { |j|
622
- if j =~ /<juan (\d+)>$/
623
- juan_no = $1.to_i
624
- elsif juan_no.nil?
625
- buf = j
626
- else
627
- write_juan(juan_no, buf+j)
628
- buf = ''
629
- end
630
- }
631
- end
632
-
633
644
  def handle_t(e)
634
645
  if e.has_attribute? 'place'
635
646
  return '' if e['place'].include? 'foot'
@@ -685,37 +696,6 @@ class CBETA::P5aToHTMLForEveryEdition
685
696
  def handle_unclear(e)
686
697
  '▆'
687
698
  end
688
-
689
- def handle_vol(vol)
690
- puts "convert volumn: #{vol}"
691
-
692
- @orig = @cbeta.get_canon_symbol(vol[0])
693
- abort "未處理底本" if @orig.nil?
694
- @orig_short = @orig.sub(/^【(.*)】$/, '\1')
695
-
696
- @vol = vol
697
- @series = vol[0]
698
- @out_folder = File.join(@out_root, @series)
699
- FileUtils::mkdir_p @out_folder
700
-
701
- source = File.join(@xml_root, @series, vol)
702
- Dir.entries(source).sort.each do |f|
703
- next if f.start_with? '.'
704
- fn = File.join(source, f)
705
- handle_sutra(fn)
706
- end
707
- end
708
-
709
- def handle_vols(v1, v2)
710
- puts "convert volumns: #{v1}..#{v2}"
711
- @series = v1[0]
712
- folder = File.join(@xml_root, @series)
713
- Dir.foreach(folder) { |vol|
714
- next if vol < v1
715
- next if vol > v2
716
- handle_vol(vol)
717
- }
718
- end
719
699
 
720
700
  def html_back(juan_no, ed)
721
701
  r = ''
@@ -735,11 +715,28 @@ class CBETA::P5aToHTMLForEveryEdition
735
715
  r
736
716
  end
737
717
 
738
- def html_copyright
718
+ def html_copyright(work, juan)
739
719
  r = "<div id='cbeta-copyright'><p>\n"
740
720
 
741
721
  orig = @cbeta.get_canon_nickname(@series)
742
- v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
722
+
723
+ # 處理 卷跨冊
724
+ if work=='L1557'
725
+ @title = '大方廣佛華嚴經疏鈔會本'
726
+ if @vol=='L131' and juan==17
727
+ v = '130-131'
728
+ elsif @vol=='L132' and juan==34
729
+ v = '131-132'
730
+ elsif @vol=='L133' and juan==51
731
+ v = '132-133'
732
+ end
733
+ elsif work=='X0714' and @vol=='X40' and juan==3
734
+ @title = '四分律含注戒本疏行宗記'
735
+ v = '39-40'
736
+ else
737
+ v = @vol.sub(/^[A-Z]0*([^0].*)$/, '\1')
738
+ end
739
+
743
740
  n = @sutra_no.sub(/^[A-Z]\d{2,3}n0*([^0].*)$/, '\1')
744
741
  r += "【經文資訊】#{orig}第 #{v} 冊 No. #{n} #{@title}<br/>\n"
745
742
  r += "【版本記錄】CBETA 電子佛典 版本日期:#{@edition_date}<br/>\n"
@@ -880,36 +877,57 @@ class CBETA::P5aToHTMLForEveryEdition
880
877
  folder = File.join(@out_folder, work, juan)
881
878
  FileUtils.remove_dir(folder, force=true)
882
879
  FileUtils.makedirs folder
880
+
883
881
  @editions.each do |ed|
884
882
  ed_html = filter_html(html, ed)
885
- text = "<div id='body'>#{ed_html}</div>"
886
-
887
883
  back = html_back(juan_no, ed)
888
- copyright = html_copyright
889
-
890
- fn = ed.sub(/^【(.*)】$/, '\1')
891
- if fn != 'CBETA' and fn != @orig_short
892
- fn = @orig_short + '' + fn
884
+
885
+ # 如果是卷跨冊的上半部
886
+ if (work=='L1557' and @vol=='L130' and juan_no==17) or
887
+ (work=='L1557' and @vol=='L131' and juan_no==34) or
888
+ (work=='L1557' and @vol=='L132' and juan_no==51) or
889
+ (work=='X0714' and @vol=='X39' and juan_no==3)
890
+ @html_buf[ed] = ed_html
891
+ @back_buf[ed] = back
892
+ next
893
+ else
894
+ body = ed_html
895
+ unless @html_buf.empty?
896
+ body = @html_buf[ed] + body
897
+ @html_buf.delete ed
898
+ end
899
+ back = @back_buf[ed] + back unless @back_buf.empty?
900
+ copyright = html_copyright(work, juan_no)
901
+ write_juan_ed(folder, ed, body, back, copyright)
902
+
903
+ @back_buf.delete ed
893
904
  end
894
- fn += '.htm'
895
- output_path = File.join(folder, fn)
896
- text = <<eos
905
+ end
906
+ end
907
+
908
+ def write_juan_ed(folder, ed, body, back, copyright)
909
+ fn = ed.sub(/^【(.*)】$/, '\1')
910
+ if fn != 'CBETA' and fn != @orig_short
911
+ fn = @orig_short + '→' + fn
912
+ end
913
+ fn += '.htm'
914
+ output_path = File.join(folder, fn)
915
+ text = <<eos
897
916
  <html>
898
917
  <head>
899
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
900
- <title>#{@title}</title>
918
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
919
+ <title>#{@title}</title>
901
920
  </head>
902
921
  <body>
903
- #{text}
904
- <div id='back'>
905
- #{back}
906
- </div>
907
- #{copyright}
922
+ <div id='body'>#{body}</div>
923
+ <div id='back'>
924
+ #{back}
925
+ </div>
926
+ #{copyright}
908
927
  </body></html>
909
928
  eos
910
- puts "write #{output_path}"
911
- File.write(output_path, text)
912
- end
929
+ puts "write: #{output_path}"
930
+ File.write(output_path, text)
913
931
  end
914
932
 
915
933
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.12
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-31 00:00:00.000000000 Z
11
+ date: 2016-06-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com