cbeta 2.2.22 → 2.2.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c051fafb22469b41ce99482cb6c7e42c2b57a934
4
- data.tar.gz: b844d9fd876dcda4236f3c0d74b21baa234fec5c
3
+ metadata.gz: d19eb4a198b323a6ee4601eee34ec46d56d41c50
4
+ data.tar.gz: 359ce18558763b5b65ac05682c4619d5fe106b7a
5
5
  SHA512:
6
- metadata.gz: 74c281e563811da223290c5cd35fc5246bae9f25267d634c553010ed3e9e7a9d7451d745b54408dbda05bec0218e1ac553fcb447b34c7ab37dd6bdbe806782cc
7
- data.tar.gz: 25a7190bde65648a11d5ea65f476b247a406c50736df9f66c1f9b90c12f7f14e3af7581023e4a397bd2fa4270134f9c9e66df257abed8bda7f752af8b8fdcaf7
6
+ metadata.gz: b25957a15c65e3e49cf6ef6f33a6e8bbec950cac63ebbb4a164286c55ce16080b4824d4b033773d5407d0b86fe9c0bc398eb5685ac7a09f9ee48c7d936b81cf4
7
+ data.tar.gz: 4af74f6871288f1bc15a8b858ccc03fd688cec5a6621578e9a38764c1f25f8dd3051f6088bd095e546d05f96ea336368b25aa2656384d06674a2e85036c08562
@@ -32,6 +32,16 @@ class CBETA
32
32
  vol.sub(/^(#{CANON}).*$/, '\1')
33
33
  end
34
34
 
35
+ # @param file_basename[String] XML檔主檔名, 例如 "T01n0001" 或 "T25n1510a"
36
+ # @param lb[String] 例如 "0001a01" 或 "0757b29"
37
+ # @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
38
+ def self.get_linehead(file_basename, lb)
39
+ r = file_basename
40
+ r += '_' if file_basename.match(/\d$/)
41
+ r += 'p' + lb
42
+ r
43
+ end
44
+
35
45
  # 由 冊號 及 典籍編號 取得 XML 主檔名
36
46
  # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
37
47
  # @param work[String] 典籍編號, 例如 "T0001" 或 "GA0008"
@@ -307,7 +307,7 @@ class CBETA::P5aToHTML
307
307
 
308
308
  @char_count = 1
309
309
  @lb = e['n']
310
- line_head = @sutra_no + '_p' + @lb
310
+ line_head = CBETA.get_linehad(@sutra_no, @lb)
311
311
 
312
312
  r = ''
313
313
  #if e.parent.name == 'lg' and $lg_row_open
@@ -72,7 +72,7 @@ class CBETA::P5aToHTMLForEveryEdition
72
72
 
73
73
  def convert_all
74
74
  Dir.entries(@xml_root).sort.each do |c|
75
- next unless c.match(/^#{CBETA.CANON}$/)
75
+ next unless c.match(/^#{CBETA::CANON}$/)
76
76
  convert_canon(c)
77
77
  end
78
78
  end
@@ -139,29 +139,7 @@ class CBETA::P5aToHTMLForEveryEdition
139
139
  end
140
140
  end
141
141
 
142
-
143
- def filter_html(html, ed)
144
- frag = Nokogiri::HTML.fragment(html)
145
- frag.search("r").each do |node|
146
- if node['w'].include? ed
147
- html_only_this_edition = filter_html(node.inner_html, ed)
148
- node.add_previous_sibling html_only_this_edition
149
- end
150
- node.remove
151
- end
152
- frag.to_html
153
- end
154
-
155
- def get_editions(doc)
156
- r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
157
- doc.xpath('//lem|//rdg').each do |e|
158
- w = e['wit'].scan(/【.*?】/)
159
- r.merge w
160
- end
161
- r
162
- end
163
-
164
- def handle_anchor(e)
142
+ def e_anchor(e)
165
143
  id = e['id']
166
144
  if e.has_attribute?('id')
167
145
  if id.start_with?('nkr_note_orig')
@@ -184,7 +162,7 @@ class CBETA::P5aToHTMLForEveryEdition
184
162
  ''
185
163
  end
186
164
 
187
- def handle_app(e)
165
+ def e_app(e)
188
166
  r = ''
189
167
  if e['type'] == 'star'
190
168
  c = e['corresp'][1..-1]
@@ -193,14 +171,14 @@ class CBETA::P5aToHTMLForEveryEdition
193
171
  r + traverse(e)
194
172
  end
195
173
 
196
- def handle_byline(e)
174
+ def e_byline(e)
197
175
  r = '<p class="byline">'
198
176
  r += line_info
199
177
  r += traverse(e)
200
178
  r + '</p>'
201
179
  end
202
180
 
203
- def handle_cell(e)
181
+ def e_cell(e)
204
182
  doc = Nokogiri::XML::Document.new
205
183
  cell = doc.create_element('div')
206
184
  cell['class'] = 'bip-table-cell'
@@ -210,7 +188,7 @@ class CBETA::P5aToHTMLForEveryEdition
210
188
  to_html(cell)
211
189
  end
212
190
 
213
- def handle_corr(e)
191
+ def e_corr(e)
214
192
  r = ''
215
193
  if e.parent.name == 'choice'
216
194
  sic = e.parent.at_xpath('sic')
@@ -231,7 +209,7 @@ class CBETA::P5aToHTMLForEveryEdition
231
209
  r + "<r w='【CBETA】' l='#{@lb}'><span class='cbeta'>%s</span></r>" % traverse(e)
232
210
  end
233
211
 
234
- def handle_div(e)
212
+ def e_div(e)
235
213
  @div_count += 1
236
214
  n = @div_count
237
215
  if e.has_attribute? 'type'
@@ -244,11 +222,11 @@ class CBETA::P5aToHTMLForEveryEdition
244
222
  end
245
223
  end
246
224
 
247
- def handle_figure(e)
225
+ def e_figure(e)
248
226
  "<p class='figure'>%s</p>" % traverse(e)
249
227
  end
250
228
 
251
- def handle_g(e, mode)
229
+ def e_g(e, mode)
252
230
  # if 有 <mapping type="unicode">
253
231
  # if 不在 Unicode Extension C, D, E 範圍裡
254
232
  # 直接採用
@@ -326,12 +304,12 @@ class CBETA::P5aToHTMLForEveryEdition
326
304
  "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
327
305
  end
328
306
 
329
- def handle_graphic(e)
307
+ def e_graphic(e)
330
308
  url = File.basename(e['url'])
331
309
  "<span imgsrc='#{url}' class='graphic'></span>"
332
310
  end
333
311
 
334
- def handle_head(e)
312
+ def e_head(e)
335
313
  r = ''
336
314
  unless e['type'] == 'added'
337
315
  i = @open_divs.size
@@ -340,15 +318,15 @@ class CBETA::P5aToHTMLForEveryEdition
340
318
  r
341
319
  end
342
320
 
343
- def handle_item(e)
321
+ def e_item(e)
344
322
  "<li>%s</li>\n" % traverse(e)
345
323
  end
346
324
 
347
- def handle_juan(e)
325
+ def e_juan(e)
348
326
  "<p class='juan'>%s</p>" % traverse(e)
349
327
  end
350
328
 
351
- def handle_l(e)
329
+ def e_l(e)
352
330
  if @lg_type == 'abnormal'
353
331
  return traverse(e)
354
332
  end
@@ -380,13 +358,13 @@ class CBETA::P5aToHTMLForEveryEdition
380
358
  r
381
359
  end
382
360
 
383
- def handle_lb(e)
361
+ def e_lb(e)
384
362
  # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
385
363
  return '' if e['ed'] != @series
386
364
 
387
365
  @char_count = 1
388
366
  @lb = e['n']
389
- line_head = @sutra_no + '_p' + e['n']
367
+ line_head = CBETA.get_linehead(@sutra_no, e['n'])
390
368
  r = ''
391
369
  #if e.parent.name == 'lg' and $lg_row_open
392
370
  if @lg_row_open && !@in_l
@@ -408,7 +386,7 @@ class CBETA::P5aToHTMLForEveryEdition
408
386
  r
409
387
  end
410
388
 
411
- def handle_lem(e)
389
+ def e_lem(e)
412
390
  r = ''
413
391
  content = traverse(e)
414
392
  wit = e['wit']
@@ -436,7 +414,7 @@ class CBETA::P5aToHTMLForEveryEdition
436
414
  r + ("<r w='#{w}' l='#{@lb}'>%s</r>" % content)
437
415
  end
438
416
 
439
- def handle_lg(e)
417
+ def e_lg(e)
440
418
  r = ''
441
419
  @lg_type = e['type']
442
420
  if @lg_type == 'abnormal'
@@ -461,11 +439,11 @@ class CBETA::P5aToHTMLForEveryEdition
461
439
  r
462
440
  end
463
441
 
464
- def handle_list(e)
442
+ def e_list(e)
465
443
  "<ul>%s</ul>" % traverse(e)
466
444
  end
467
445
 
468
- def handle_milestone(e)
446
+ def e_milestone(e)
469
447
  r = ''
470
448
  if e['unit'] == 'juan'
471
449
 
@@ -484,7 +462,7 @@ class CBETA::P5aToHTMLForEveryEdition
484
462
  r
485
463
  end
486
464
 
487
- def handle_mulu(e)
465
+ def e_mulu(e)
488
466
  r = ''
489
467
  if e['type'] == '品'
490
468
  @pass << false
@@ -493,49 +471,9 @@ class CBETA::P5aToHTMLForEveryEdition
493
471
  end
494
472
  r
495
473
  end
474
+
496
475
 
497
- def handle_node(e, mode)
498
- return '' if e.comment?
499
- return handle_text(e, mode) if e.text?
500
- return '' if PASS.include?(e.name)
501
- r = case e.name
502
- when 'anchor' then handle_anchor(e)
503
- when 'app' then handle_app(e)
504
- when 'byline' then handle_byline(e)
505
- when 'cell' then handle_cell(e)
506
- when 'corr' then handle_corr(e)
507
- when 'div' then handle_div(e)
508
- when 'figure' then handle_figure(e)
509
- when 'foreign' then ''
510
- when 'g' then handle_g(e, mode)
511
- when 'graphic' then handle_graphic(e)
512
- when 'head' then handle_head(e)
513
- when 'item' then handle_item(e)
514
- when 'juan' then handle_juan(e)
515
- when 'l' then handle_l(e)
516
- when 'lb' then handle_lb(e)
517
- when 'lem' then handle_lem(e)
518
- when 'lg' then handle_lg(e)
519
- when 'list' then handle_list(e)
520
- when 'mulu' then handle_mulu(e)
521
- when 'note' then handle_note(e)
522
- when 'milestone' then handle_milestone(e)
523
- when 'p' then handle_p(e)
524
- when 'rdg' then handle_rdg(e)
525
- when 'reg' then ''
526
- when 'row' then handle_row(e)
527
- when 'sic' then handle_sic(e)
528
- when 'sg' then handle_sg(e)
529
- when 't' then handle_t(e)
530
- when 'tt' then handle_tt(e)
531
- when 'table' then handle_table(e)
532
- when 'unclear' then handle_unclear(e)
533
- else traverse(e)
534
- end
535
- r
536
- end
537
-
538
- def handle_note(e)
476
+ def e_note(e)
539
477
  n = e['n']
540
478
  if e.has_attribute?('type')
541
479
  t = e['type']
@@ -580,36 +518,8 @@ class CBETA::P5aToHTMLForEveryEdition
580
518
  end
581
519
  end
582
520
 
583
- def handle_note_orig(e, anchor_type=nil)
584
- n = e['n']
585
- @pass << false
586
- s = traverse(e)
587
- @pass.pop
588
- @notes_orig[@juan][n] = s
589
- @notes_mod[@juan][n] = s
590
-
591
- c = @series
592
-
593
- # 如果 CBETA 沒有修訂,就跟底本的註一樣
594
- # 但是 CBETA 修訂後的編號,有時會加上 a, b
595
- # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
596
- c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
597
521
 
598
- label = case anchor_type
599
- when 'biao' then " data-label='標#{n[-2..-1]}'"
600
- when 'ke' then " data-label='科#{n[-2..-1]}'"
601
- else ''
602
- end
603
- s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
604
- r = "<r w='#{@orig}'>#{s}</r>"
605
-
606
- unless @mod_notes.include?(n)
607
- r += "<r w='【CBETA】'>#{s}</r>"
608
- end
609
- r
610
- end
611
-
612
- def handle_p(e)
522
+ def e_p(e)
613
523
  if e.key? 'type'
614
524
  r = "<p class='%s'>" % e['type']
615
525
  else
@@ -620,24 +530,24 @@ class CBETA::P5aToHTMLForEveryEdition
620
530
  r + '</p>'
621
531
  end
622
532
 
623
- def handle_rdg(e)
533
+ def e_rdg(e)
624
534
  r = traverse(e)
625
535
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
626
536
  end
627
537
 
628
- def handle_row(e)
538
+ def e_row(e)
629
539
  "<div class='bip-table-row'>" + traverse(e) + "</div>"
630
540
  end
631
541
 
632
- def handle_sg(e)
542
+ def e_sg(e)
633
543
  '(' + traverse(e) + ')'
634
544
  end
635
545
 
636
- def handle_sic(e)
546
+ def e_sic(e)
637
547
  "<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
638
548
  end
639
549
 
640
- def handle_t(e)
550
+ def e_t(e)
641
551
  if e.has_attribute? 'place'
642
552
  return '' if e['place'].include? 'foot'
643
553
  end
@@ -659,15 +569,112 @@ class CBETA::P5aToHTMLForEveryEdition
659
569
  end
660
570
  end
661
571
 
662
- def handle_tt(e)
572
+ def e_tt(e)
663
573
  @tt_type = e['type']
664
574
  traverse(e)
665
575
  end
666
576
 
667
- def handle_table(e)
577
+ def e_table(e)
668
578
  "<div class='bip-table'>" + traverse(e) + "</div>"
669
579
  end
580
+
581
+ def e_unclear(e)
582
+ '▆'
583
+ end
584
+
585
+ def filter_html(html, ed)
586
+ progress "filter html ed: #{ed}"
587
+ frag = Nokogiri::HTML.fragment(html)
588
+ frag.search("r").each do |node|
589
+ if node['w'].include? ed
590
+ html_only_this_edition = filter_html(node.inner_html, ed)
591
+ node.add_previous_sibling html_only_this_edition
592
+ end
593
+ node.remove
594
+ end
595
+ frag.to_html
596
+ end
597
+
598
+ def get_editions(doc)
599
+ r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
600
+ doc.xpath('//lem|//rdg').each do |e|
601
+ w = e['wit'].scan(/【.*?】/)
602
+ r.merge w
603
+ end
604
+ r
605
+ end
670
606
 
607
+
608
+ def handle_node(e, mode)
609
+ return '' if e.comment?
610
+ return handle_text(e, mode) if e.text?
611
+ return '' if PASS.include?(e.name)
612
+ r = case e.name
613
+ when 'anchor' then e_anchor(e)
614
+ when 'app' then e_app(e)
615
+ when 'byline' then e_byline(e)
616
+ when 'cell' then e_cell(e)
617
+ when 'corr' then e_corr(e)
618
+ when 'div' then e_div(e)
619
+ when 'figure' then e_figure(e)
620
+ when 'foreign' then ''
621
+ when 'g' then e_g(e, mode)
622
+ when 'graphic' then e_graphic(e)
623
+ when 'head' then e_head(e)
624
+ when 'item' then e_item(e)
625
+ when 'juan' then e_juan(e)
626
+ when 'l' then e_l(e)
627
+ when 'lb' then e_lb(e)
628
+ when 'lem' then e_lem(e)
629
+ when 'lg' then e_lg(e)
630
+ when 'list' then e_list(e)
631
+ when 'mulu' then e_mulu(e)
632
+ when 'note' then e_note(e)
633
+ when 'milestone' then e_milestone(e)
634
+ when 'p' then e_p(e)
635
+ when 'rdg' then e_rdg(e)
636
+ when 'reg' then ''
637
+ when 'row' then e_row(e)
638
+ when 'sic' then e_sic(e)
639
+ when 'sg' then e_sg(e)
640
+ when 't' then e_t(e)
641
+ when 'tt' then e_tt(e)
642
+ when 'table' then e_table(e)
643
+ when 'unclear' then e_unclear(e)
644
+ else traverse(e)
645
+ end
646
+ r
647
+ end
648
+
649
+ def handle_note_orig(e, anchor_type=nil)
650
+ n = e['n']
651
+ @pass << false
652
+ s = traverse(e)
653
+ @pass.pop
654
+ @notes_orig[@juan][n] = s
655
+ @notes_mod[@juan][n] = s
656
+
657
+ c = @series
658
+
659
+ # 如果 CBETA 沒有修訂,就跟底本的註一樣
660
+ # 但是 CBETA 修訂後的編號,有時會加上 a, b
661
+ # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
662
+ c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
663
+
664
+ label = case anchor_type
665
+ when 'biao' then " data-label='標#{n[-2..-1]}'"
666
+ when 'ke' then " data-label='科#{n[-2..-1]}'"
667
+ else ''
668
+ end
669
+ s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
670
+ r = "<r w='#{@orig}'>#{s}</r>"
671
+
672
+ unless @mod_notes.include?(n)
673
+ r += "<r w='【CBETA】'>#{s}</r>"
674
+ end
675
+ r
676
+ end
677
+
671
678
  def handle_text(e, mode)
672
679
  s = e.content().chomp
673
680
  return '' if s.empty?
@@ -689,11 +696,8 @@ class CBETA::P5aToHTMLForEveryEdition
689
696
  r
690
697
  end
691
698
 
692
- def handle_unclear(e)
693
- '▆'
694
- end
695
-
696
699
  def html_back(juan_no, ed)
700
+ progress "html back, juan: #{juan_no}, ed: #{ed}"
697
701
  r = ''
698
702
  case ed
699
703
  when '【CBETA】'
@@ -843,6 +847,11 @@ class CBETA::P5aToHTMLForEveryEdition
843
847
  text = traverse(body)
844
848
  text
845
849
  end
850
+
851
+ def progress(msg)
852
+ puts Time.now.strftime("%Y-%m-%d %H:%M:%S")
853
+ puts msg
854
+ end
846
855
 
847
856
  def to_html(e)
848
857
  e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.22
4
+ version: 2.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-03 00:00:00.000000000 Z
11
+ date: 2016-12-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com