cbeta 2.2.22 → 2.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c051fafb22469b41ce99482cb6c7e42c2b57a934
4
- data.tar.gz: b844d9fd876dcda4236f3c0d74b21baa234fec5c
3
+ metadata.gz: d19eb4a198b323a6ee4601eee34ec46d56d41c50
4
+ data.tar.gz: 359ce18558763b5b65ac05682c4619d5fe106b7a
5
5
  SHA512:
6
- metadata.gz: 74c281e563811da223290c5cd35fc5246bae9f25267d634c553010ed3e9e7a9d7451d745b54408dbda05bec0218e1ac553fcb447b34c7ab37dd6bdbe806782cc
7
- data.tar.gz: 25a7190bde65648a11d5ea65f476b247a406c50736df9f66c1f9b90c12f7f14e3af7581023e4a397bd2fa4270134f9c9e66df257abed8bda7f752af8b8fdcaf7
6
+ metadata.gz: b25957a15c65e3e49cf6ef6f33a6e8bbec950cac63ebbb4a164286c55ce16080b4824d4b033773d5407d0b86fe9c0bc398eb5685ac7a09f9ee48c7d936b81cf4
7
+ data.tar.gz: 4af74f6871288f1bc15a8b858ccc03fd688cec5a6621578e9a38764c1f25f8dd3051f6088bd095e546d05f96ea336368b25aa2656384d06674a2e85036c08562
@@ -32,6 +32,16 @@ class CBETA
32
32
  vol.sub(/^(#{CANON}).*$/, '\1')
33
33
  end
34
34
 
35
+ # @param file_basename[String] XML檔主檔名, 例如 "T01n0001" 或 "T25n1510a"
36
+ # @param lb[String] 例如 "0001a01" 或 "0757b29"
37
+ # @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
38
+ def self.get_linehead(file_basename, lb)
39
+ r = file_basename
40
+ r += '_' if file_basename.match(/\d$/)
41
+ r += 'p' + lb
42
+ r
43
+ end
44
+
35
45
  # 由 冊號 及 典籍編號 取得 XML 主檔名
36
46
  # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
37
47
  # @param work[String] 典籍編號, 例如 "T0001" 或 "GA0008"
@@ -307,7 +307,7 @@ class CBETA::P5aToHTML
307
307
 
308
308
  @char_count = 1
309
309
  @lb = e['n']
310
- line_head = @sutra_no + '_p' + @lb
310
+ line_head = CBETA.get_linehad(@sutra_no, @lb)
311
311
 
312
312
  r = ''
313
313
  #if e.parent.name == 'lg' and $lg_row_open
@@ -72,7 +72,7 @@ class CBETA::P5aToHTMLForEveryEdition
72
72
 
73
73
  def convert_all
74
74
  Dir.entries(@xml_root).sort.each do |c|
75
- next unless c.match(/^#{CBETA.CANON}$/)
75
+ next unless c.match(/^#{CBETA::CANON}$/)
76
76
  convert_canon(c)
77
77
  end
78
78
  end
@@ -139,29 +139,7 @@ class CBETA::P5aToHTMLForEveryEdition
139
139
  end
140
140
  end
141
141
 
142
-
143
- def filter_html(html, ed)
144
- frag = Nokogiri::HTML.fragment(html)
145
- frag.search("r").each do |node|
146
- if node['w'].include? ed
147
- html_only_this_edition = filter_html(node.inner_html, ed)
148
- node.add_previous_sibling html_only_this_edition
149
- end
150
- node.remove
151
- end
152
- frag.to_html
153
- end
154
-
155
- def get_editions(doc)
156
- r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
157
- doc.xpath('//lem|//rdg').each do |e|
158
- w = e['wit'].scan(/【.*?】/)
159
- r.merge w
160
- end
161
- r
162
- end
163
-
164
- def handle_anchor(e)
142
+ def e_anchor(e)
165
143
  id = e['id']
166
144
  if e.has_attribute?('id')
167
145
  if id.start_with?('nkr_note_orig')
@@ -184,7 +162,7 @@ class CBETA::P5aToHTMLForEveryEdition
184
162
  ''
185
163
  end
186
164
 
187
- def handle_app(e)
165
+ def e_app(e)
188
166
  r = ''
189
167
  if e['type'] == 'star'
190
168
  c = e['corresp'][1..-1]
@@ -193,14 +171,14 @@ class CBETA::P5aToHTMLForEveryEdition
193
171
  r + traverse(e)
194
172
  end
195
173
 
196
- def handle_byline(e)
174
+ def e_byline(e)
197
175
  r = '<p class="byline">'
198
176
  r += line_info
199
177
  r += traverse(e)
200
178
  r + '</p>'
201
179
  end
202
180
 
203
- def handle_cell(e)
181
+ def e_cell(e)
204
182
  doc = Nokogiri::XML::Document.new
205
183
  cell = doc.create_element('div')
206
184
  cell['class'] = 'bip-table-cell'
@@ -210,7 +188,7 @@ class CBETA::P5aToHTMLForEveryEdition
210
188
  to_html(cell)
211
189
  end
212
190
 
213
- def handle_corr(e)
191
+ def e_corr(e)
214
192
  r = ''
215
193
  if e.parent.name == 'choice'
216
194
  sic = e.parent.at_xpath('sic')
@@ -231,7 +209,7 @@ class CBETA::P5aToHTMLForEveryEdition
231
209
  r + "<r w='【CBETA】' l='#{@lb}'><span class='cbeta'>%s</span></r>" % traverse(e)
232
210
  end
233
211
 
234
- def handle_div(e)
212
+ def e_div(e)
235
213
  @div_count += 1
236
214
  n = @div_count
237
215
  if e.has_attribute? 'type'
@@ -244,11 +222,11 @@ class CBETA::P5aToHTMLForEveryEdition
244
222
  end
245
223
  end
246
224
 
247
- def handle_figure(e)
225
+ def e_figure(e)
248
226
  "<p class='figure'>%s</p>" % traverse(e)
249
227
  end
250
228
 
251
- def handle_g(e, mode)
229
+ def e_g(e, mode)
252
230
  # if 有 <mapping type="unicode">
253
231
  # if 不在 Unicode Extension C, D, E 範圍裡
254
232
  # 直接採用
@@ -326,12 +304,12 @@ class CBETA::P5aToHTMLForEveryEdition
326
304
  "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
327
305
  end
328
306
 
329
- def handle_graphic(e)
307
+ def e_graphic(e)
330
308
  url = File.basename(e['url'])
331
309
  "<span imgsrc='#{url}' class='graphic'></span>"
332
310
  end
333
311
 
334
- def handle_head(e)
312
+ def e_head(e)
335
313
  r = ''
336
314
  unless e['type'] == 'added'
337
315
  i = @open_divs.size
@@ -340,15 +318,15 @@ class CBETA::P5aToHTMLForEveryEdition
340
318
  r
341
319
  end
342
320
 
343
- def handle_item(e)
321
+ def e_item(e)
344
322
  "<li>%s</li>\n" % traverse(e)
345
323
  end
346
324
 
347
- def handle_juan(e)
325
+ def e_juan(e)
348
326
  "<p class='juan'>%s</p>" % traverse(e)
349
327
  end
350
328
 
351
- def handle_l(e)
329
+ def e_l(e)
352
330
  if @lg_type == 'abnormal'
353
331
  return traverse(e)
354
332
  end
@@ -380,13 +358,13 @@ class CBETA::P5aToHTMLForEveryEdition
380
358
  r
381
359
  end
382
360
 
383
- def handle_lb(e)
361
+ def e_lb(e)
384
362
  # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
385
363
  return '' if e['ed'] != @series
386
364
 
387
365
  @char_count = 1
388
366
  @lb = e['n']
389
- line_head = @sutra_no + '_p' + e['n']
367
+ line_head = CBETA.get_linehead(@sutra_no, e['n'])
390
368
  r = ''
391
369
  #if e.parent.name == 'lg' and $lg_row_open
392
370
  if @lg_row_open && !@in_l
@@ -408,7 +386,7 @@ class CBETA::P5aToHTMLForEveryEdition
408
386
  r
409
387
  end
410
388
 
411
- def handle_lem(e)
389
+ def e_lem(e)
412
390
  r = ''
413
391
  content = traverse(e)
414
392
  wit = e['wit']
@@ -436,7 +414,7 @@ class CBETA::P5aToHTMLForEveryEdition
436
414
  r + ("<r w='#{w}' l='#{@lb}'>%s</r>" % content)
437
415
  end
438
416
 
439
- def handle_lg(e)
417
+ def e_lg(e)
440
418
  r = ''
441
419
  @lg_type = e['type']
442
420
  if @lg_type == 'abnormal'
@@ -461,11 +439,11 @@ class CBETA::P5aToHTMLForEveryEdition
461
439
  r
462
440
  end
463
441
 
464
- def handle_list(e)
442
+ def e_list(e)
465
443
  "<ul>%s</ul>" % traverse(e)
466
444
  end
467
445
 
468
- def handle_milestone(e)
446
+ def e_milestone(e)
469
447
  r = ''
470
448
  if e['unit'] == 'juan'
471
449
 
@@ -484,7 +462,7 @@ class CBETA::P5aToHTMLForEveryEdition
484
462
  r
485
463
  end
486
464
 
487
- def handle_mulu(e)
465
+ def e_mulu(e)
488
466
  r = ''
489
467
  if e['type'] == '品'
490
468
  @pass << false
@@ -493,49 +471,9 @@ class CBETA::P5aToHTMLForEveryEdition
493
471
  end
494
472
  r
495
473
  end
474
+
496
475
 
497
- def handle_node(e, mode)
498
- return '' if e.comment?
499
- return handle_text(e, mode) if e.text?
500
- return '' if PASS.include?(e.name)
501
- r = case e.name
502
- when 'anchor' then handle_anchor(e)
503
- when 'app' then handle_app(e)
504
- when 'byline' then handle_byline(e)
505
- when 'cell' then handle_cell(e)
506
- when 'corr' then handle_corr(e)
507
- when 'div' then handle_div(e)
508
- when 'figure' then handle_figure(e)
509
- when 'foreign' then ''
510
- when 'g' then handle_g(e, mode)
511
- when 'graphic' then handle_graphic(e)
512
- when 'head' then handle_head(e)
513
- when 'item' then handle_item(e)
514
- when 'juan' then handle_juan(e)
515
- when 'l' then handle_l(e)
516
- when 'lb' then handle_lb(e)
517
- when 'lem' then handle_lem(e)
518
- when 'lg' then handle_lg(e)
519
- when 'list' then handle_list(e)
520
- when 'mulu' then handle_mulu(e)
521
- when 'note' then handle_note(e)
522
- when 'milestone' then handle_milestone(e)
523
- when 'p' then handle_p(e)
524
- when 'rdg' then handle_rdg(e)
525
- when 'reg' then ''
526
- when 'row' then handle_row(e)
527
- when 'sic' then handle_sic(e)
528
- when 'sg' then handle_sg(e)
529
- when 't' then handle_t(e)
530
- when 'tt' then handle_tt(e)
531
- when 'table' then handle_table(e)
532
- when 'unclear' then handle_unclear(e)
533
- else traverse(e)
534
- end
535
- r
536
- end
537
-
538
- def handle_note(e)
476
+ def e_note(e)
539
477
  n = e['n']
540
478
  if e.has_attribute?('type')
541
479
  t = e['type']
@@ -580,36 +518,8 @@ class CBETA::P5aToHTMLForEveryEdition
580
518
  end
581
519
  end
582
520
 
583
- def handle_note_orig(e, anchor_type=nil)
584
- n = e['n']
585
- @pass << false
586
- s = traverse(e)
587
- @pass.pop
588
- @notes_orig[@juan][n] = s
589
- @notes_mod[@juan][n] = s
590
-
591
- c = @series
592
-
593
- # 如果 CBETA 沒有修訂,就跟底本的註一樣
594
- # 但是 CBETA 修訂後的編號,有時會加上 a, b
595
- # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
596
- c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
597
521
 
598
- label = case anchor_type
599
- when 'biao' then " data-label='標#{n[-2..-1]}'"
600
- when 'ke' then " data-label='科#{n[-2..-1]}'"
601
- else ''
602
- end
603
- s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
604
- r = "<r w='#{@orig}'>#{s}</r>"
605
-
606
- unless @mod_notes.include?(n)
607
- r += "<r w='【CBETA】'>#{s}</r>"
608
- end
609
- r
610
- end
611
-
612
- def handle_p(e)
522
+ def e_p(e)
613
523
  if e.key? 'type'
614
524
  r = "<p class='%s'>" % e['type']
615
525
  else
@@ -620,24 +530,24 @@ class CBETA::P5aToHTMLForEveryEdition
620
530
  r + '</p>'
621
531
  end
622
532
 
623
- def handle_rdg(e)
533
+ def e_rdg(e)
624
534
  r = traverse(e)
625
535
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
626
536
  end
627
537
 
628
- def handle_row(e)
538
+ def e_row(e)
629
539
  "<div class='bip-table-row'>" + traverse(e) + "</div>"
630
540
  end
631
541
 
632
- def handle_sg(e)
542
+ def e_sg(e)
633
543
  '(' + traverse(e) + ')'
634
544
  end
635
545
 
636
- def handle_sic(e)
546
+ def e_sic(e)
637
547
  "<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
638
548
  end
639
549
 
640
- def handle_t(e)
550
+ def e_t(e)
641
551
  if e.has_attribute? 'place'
642
552
  return '' if e['place'].include? 'foot'
643
553
  end
@@ -659,15 +569,112 @@ class CBETA::P5aToHTMLForEveryEdition
659
569
  end
660
570
  end
661
571
 
662
- def handle_tt(e)
572
+ def e_tt(e)
663
573
  @tt_type = e['type']
664
574
  traverse(e)
665
575
  end
666
576
 
667
- def handle_table(e)
577
+ def e_table(e)
668
578
  "<div class='bip-table'>" + traverse(e) + "</div>"
669
579
  end
580
+
581
+ def e_unclear(e)
582
+ '▆'
583
+ end
584
+
585
+ def filter_html(html, ed)
586
+ progress "filter html ed: #{ed}"
587
+ frag = Nokogiri::HTML.fragment(html)
588
+ frag.search("r").each do |node|
589
+ if node['w'].include? ed
590
+ html_only_this_edition = filter_html(node.inner_html, ed)
591
+ node.add_previous_sibling html_only_this_edition
592
+ end
593
+ node.remove
594
+ end
595
+ frag.to_html
596
+ end
597
+
598
+ def get_editions(doc)
599
+ r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
600
+ doc.xpath('//lem|//rdg').each do |e|
601
+ w = e['wit'].scan(/【.*?】/)
602
+ r.merge w
603
+ end
604
+ r
605
+ end
670
606
 
607
+
608
+ def handle_node(e, mode)
609
+ return '' if e.comment?
610
+ return handle_text(e, mode) if e.text?
611
+ return '' if PASS.include?(e.name)
612
+ r = case e.name
613
+ when 'anchor' then e_anchor(e)
614
+ when 'app' then e_app(e)
615
+ when 'byline' then e_byline(e)
616
+ when 'cell' then e_cell(e)
617
+ when 'corr' then e_corr(e)
618
+ when 'div' then e_div(e)
619
+ when 'figure' then e_figure(e)
620
+ when 'foreign' then ''
621
+ when 'g' then e_g(e, mode)
622
+ when 'graphic' then e_graphic(e)
623
+ when 'head' then e_head(e)
624
+ when 'item' then e_item(e)
625
+ when 'juan' then e_juan(e)
626
+ when 'l' then e_l(e)
627
+ when 'lb' then e_lb(e)
628
+ when 'lem' then e_lem(e)
629
+ when 'lg' then e_lg(e)
630
+ when 'list' then e_list(e)
631
+ when 'mulu' then e_mulu(e)
632
+ when 'note' then e_note(e)
633
+ when 'milestone' then e_milestone(e)
634
+ when 'p' then e_p(e)
635
+ when 'rdg' then e_rdg(e)
636
+ when 'reg' then ''
637
+ when 'row' then e_row(e)
638
+ when 'sic' then e_sic(e)
639
+ when 'sg' then e_sg(e)
640
+ when 't' then e_t(e)
641
+ when 'tt' then e_tt(e)
642
+ when 'table' then e_table(e)
643
+ when 'unclear' then e_unclear(e)
644
+ else traverse(e)
645
+ end
646
+ r
647
+ end
648
+
649
+ def handle_note_orig(e, anchor_type=nil)
650
+ n = e['n']
651
+ @pass << false
652
+ s = traverse(e)
653
+ @pass.pop
654
+ @notes_orig[@juan][n] = s
655
+ @notes_mod[@juan][n] = s
656
+
657
+ c = @series
658
+
659
+ # 如果 CBETA 沒有修訂,就跟底本的註一樣
660
+ # 但是 CBETA 修訂後的編號,有時會加上 a, b
661
+ # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
662
+ c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
663
+
664
+ label = case anchor_type
665
+ when 'biao' then " data-label='標#{n[-2..-1]}'"
666
+ when 'ke' then " data-label='科#{n[-2..-1]}'"
667
+ else ''
668
+ end
669
+ s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
670
+ r = "<r w='#{@orig}'>#{s}</r>"
671
+
672
+ unless @mod_notes.include?(n)
673
+ r += "<r w='【CBETA】'>#{s}</r>"
674
+ end
675
+ r
676
+ end
677
+
671
678
  def handle_text(e, mode)
672
679
  s = e.content().chomp
673
680
  return '' if s.empty?
@@ -689,11 +696,8 @@ class CBETA::P5aToHTMLForEveryEdition
689
696
  r
690
697
  end
691
698
 
692
- def handle_unclear(e)
693
- '▆'
694
- end
695
-
696
699
  def html_back(juan_no, ed)
700
+ progress "html back, juan: #{juan_no}, ed: #{ed}"
697
701
  r = ''
698
702
  case ed
699
703
  when '【CBETA】'
@@ -843,6 +847,11 @@ class CBETA::P5aToHTMLForEveryEdition
843
847
  text = traverse(body)
844
848
  text
845
849
  end
850
+
851
+ def progress(msg)
852
+ puts Time.now.strftime("%Y-%m-%d %H:%M:%S")
853
+ puts msg
854
+ end
846
855
 
847
856
  def to_html(e)
848
857
  e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.22
4
+ version: 2.2.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-03 00:00:00.000000000 Z
11
+ date: 2016-12-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com