cbeta 2.2.22 → 2.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta.rb +10 -0
- data/lib/cbeta/p5a_to_html.rb +1 -1
- data/lib/cbeta/p5a_to_html_for_every_edition.rb +134 -125
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d19eb4a198b323a6ee4601eee34ec46d56d41c50
|
4
|
+
data.tar.gz: 359ce18558763b5b65ac05682c4619d5fe106b7a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b25957a15c65e3e49cf6ef6f33a6e8bbec950cac63ebbb4a164286c55ce16080b4824d4b033773d5407d0b86fe9c0bc398eb5685ac7a09f9ee48c7d936b81cf4
|
7
|
+
data.tar.gz: 4af74f6871288f1bc15a8b858ccc03fd688cec5a6621578e9a38764c1f25f8dd3051f6088bd095e546d05f96ea336368b25aa2656384d06674a2e85036c08562
|
data/lib/cbeta.rb
CHANGED
@@ -32,6 +32,16 @@ class CBETA
|
|
32
32
|
vol.sub(/^(#{CANON}).*$/, '\1')
|
33
33
|
end
|
34
34
|
|
35
|
+
# @param file_basename[String] XML檔主檔名, 例如 "T01n0001" 或 "T25n1510a"
|
36
|
+
# @param lb[String] 例如 "0001a01" 或 "0757b29"
|
37
|
+
# @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
|
38
|
+
def self.get_linehead(file_basename, lb)
|
39
|
+
r = file_basename
|
40
|
+
r += '_' if file_basename.match(/\d$/)
|
41
|
+
r += 'p' + lb
|
42
|
+
r
|
43
|
+
end
|
44
|
+
|
35
45
|
# 由 冊號 及 典籍編號 取得 XML 主檔名
|
36
46
|
# @param vol[String] 冊號, 例如 "T01" 或 "GA009"
|
37
47
|
# @param work[String] 典籍編號, 例如 "T0001" 或 "GA0008"
|
data/lib/cbeta/p5a_to_html.rb
CHANGED
@@ -72,7 +72,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
72
72
|
|
73
73
|
def convert_all
|
74
74
|
Dir.entries(@xml_root).sort.each do |c|
|
75
|
-
next unless c.match(/^#{CBETA
|
75
|
+
next unless c.match(/^#{CBETA::CANON}$/)
|
76
76
|
convert_canon(c)
|
77
77
|
end
|
78
78
|
end
|
@@ -139,29 +139,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
139
139
|
end
|
140
140
|
end
|
141
141
|
|
142
|
-
|
143
|
-
def filter_html(html, ed)
|
144
|
-
frag = Nokogiri::HTML.fragment(html)
|
145
|
-
frag.search("r").each do |node|
|
146
|
-
if node['w'].include? ed
|
147
|
-
html_only_this_edition = filter_html(node.inner_html, ed)
|
148
|
-
node.add_previous_sibling html_only_this_edition
|
149
|
-
end
|
150
|
-
node.remove
|
151
|
-
end
|
152
|
-
frag.to_html
|
153
|
-
end
|
154
|
-
|
155
|
-
def get_editions(doc)
|
156
|
-
r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
|
157
|
-
doc.xpath('//lem|//rdg').each do |e|
|
158
|
-
w = e['wit'].scan(/【.*?】/)
|
159
|
-
r.merge w
|
160
|
-
end
|
161
|
-
r
|
162
|
-
end
|
163
|
-
|
164
|
-
def handle_anchor(e)
|
142
|
+
def e_anchor(e)
|
165
143
|
id = e['id']
|
166
144
|
if e.has_attribute?('id')
|
167
145
|
if id.start_with?('nkr_note_orig')
|
@@ -184,7 +162,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
184
162
|
''
|
185
163
|
end
|
186
164
|
|
187
|
-
def
|
165
|
+
def e_app(e)
|
188
166
|
r = ''
|
189
167
|
if e['type'] == 'star'
|
190
168
|
c = e['corresp'][1..-1]
|
@@ -193,14 +171,14 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
193
171
|
r + traverse(e)
|
194
172
|
end
|
195
173
|
|
196
|
-
def
|
174
|
+
def e_byline(e)
|
197
175
|
r = '<p class="byline">'
|
198
176
|
r += line_info
|
199
177
|
r += traverse(e)
|
200
178
|
r + '</p>'
|
201
179
|
end
|
202
180
|
|
203
|
-
def
|
181
|
+
def e_cell(e)
|
204
182
|
doc = Nokogiri::XML::Document.new
|
205
183
|
cell = doc.create_element('div')
|
206
184
|
cell['class'] = 'bip-table-cell'
|
@@ -210,7 +188,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
210
188
|
to_html(cell)
|
211
189
|
end
|
212
190
|
|
213
|
-
def
|
191
|
+
def e_corr(e)
|
214
192
|
r = ''
|
215
193
|
if e.parent.name == 'choice'
|
216
194
|
sic = e.parent.at_xpath('sic')
|
@@ -231,7 +209,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
231
209
|
r + "<r w='【CBETA】' l='#{@lb}'><span class='cbeta'>%s</span></r>" % traverse(e)
|
232
210
|
end
|
233
211
|
|
234
|
-
def
|
212
|
+
def e_div(e)
|
235
213
|
@div_count += 1
|
236
214
|
n = @div_count
|
237
215
|
if e.has_attribute? 'type'
|
@@ -244,11 +222,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
244
222
|
end
|
245
223
|
end
|
246
224
|
|
247
|
-
def
|
225
|
+
def e_figure(e)
|
248
226
|
"<p class='figure'>%s</p>" % traverse(e)
|
249
227
|
end
|
250
228
|
|
251
|
-
def
|
229
|
+
def e_g(e, mode)
|
252
230
|
# if 有 <mapping type="unicode">
|
253
231
|
# if 不在 Unicode Extension C, D, E 範圍裡
|
254
232
|
# 直接採用
|
@@ -326,12 +304,12 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
326
304
|
"<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
|
327
305
|
end
|
328
306
|
|
329
|
-
def
|
307
|
+
def e_graphic(e)
|
330
308
|
url = File.basename(e['url'])
|
331
309
|
"<span imgsrc='#{url}' class='graphic'></span>"
|
332
310
|
end
|
333
311
|
|
334
|
-
def
|
312
|
+
def e_head(e)
|
335
313
|
r = ''
|
336
314
|
unless e['type'] == 'added'
|
337
315
|
i = @open_divs.size
|
@@ -340,15 +318,15 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
340
318
|
r
|
341
319
|
end
|
342
320
|
|
343
|
-
def
|
321
|
+
def e_item(e)
|
344
322
|
"<li>%s</li>\n" % traverse(e)
|
345
323
|
end
|
346
324
|
|
347
|
-
def
|
325
|
+
def e_juan(e)
|
348
326
|
"<p class='juan'>%s</p>" % traverse(e)
|
349
327
|
end
|
350
328
|
|
351
|
-
def
|
329
|
+
def e_l(e)
|
352
330
|
if @lg_type == 'abnormal'
|
353
331
|
return traverse(e)
|
354
332
|
end
|
@@ -380,13 +358,13 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
380
358
|
r
|
381
359
|
end
|
382
360
|
|
383
|
-
def
|
361
|
+
def e_lb(e)
|
384
362
|
# 卍續藏有 X 跟 R 兩種 lb, 只處理 X
|
385
363
|
return '' if e['ed'] != @series
|
386
364
|
|
387
365
|
@char_count = 1
|
388
366
|
@lb = e['n']
|
389
|
-
line_head = @sutra_no
|
367
|
+
line_head = CBETA.get_linehead(@sutra_no, e['n'])
|
390
368
|
r = ''
|
391
369
|
#if e.parent.name == 'lg' and $lg_row_open
|
392
370
|
if @lg_row_open && !@in_l
|
@@ -408,7 +386,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
408
386
|
r
|
409
387
|
end
|
410
388
|
|
411
|
-
def
|
389
|
+
def e_lem(e)
|
412
390
|
r = ''
|
413
391
|
content = traverse(e)
|
414
392
|
wit = e['wit']
|
@@ -436,7 +414,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
436
414
|
r + ("<r w='#{w}' l='#{@lb}'>%s</r>" % content)
|
437
415
|
end
|
438
416
|
|
439
|
-
def
|
417
|
+
def e_lg(e)
|
440
418
|
r = ''
|
441
419
|
@lg_type = e['type']
|
442
420
|
if @lg_type == 'abnormal'
|
@@ -461,11 +439,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
461
439
|
r
|
462
440
|
end
|
463
441
|
|
464
|
-
def
|
442
|
+
def e_list(e)
|
465
443
|
"<ul>%s</ul>" % traverse(e)
|
466
444
|
end
|
467
445
|
|
468
|
-
def
|
446
|
+
def e_milestone(e)
|
469
447
|
r = ''
|
470
448
|
if e['unit'] == 'juan'
|
471
449
|
|
@@ -484,7 +462,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
484
462
|
r
|
485
463
|
end
|
486
464
|
|
487
|
-
def
|
465
|
+
def e_mulu(e)
|
488
466
|
r = ''
|
489
467
|
if e['type'] == '品'
|
490
468
|
@pass << false
|
@@ -493,49 +471,9 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
493
471
|
end
|
494
472
|
r
|
495
473
|
end
|
474
|
+
|
496
475
|
|
497
|
-
def
|
498
|
-
return '' if e.comment?
|
499
|
-
return handle_text(e, mode) if e.text?
|
500
|
-
return '' if PASS.include?(e.name)
|
501
|
-
r = case e.name
|
502
|
-
when 'anchor' then handle_anchor(e)
|
503
|
-
when 'app' then handle_app(e)
|
504
|
-
when 'byline' then handle_byline(e)
|
505
|
-
when 'cell' then handle_cell(e)
|
506
|
-
when 'corr' then handle_corr(e)
|
507
|
-
when 'div' then handle_div(e)
|
508
|
-
when 'figure' then handle_figure(e)
|
509
|
-
when 'foreign' then ''
|
510
|
-
when 'g' then handle_g(e, mode)
|
511
|
-
when 'graphic' then handle_graphic(e)
|
512
|
-
when 'head' then handle_head(e)
|
513
|
-
when 'item' then handle_item(e)
|
514
|
-
when 'juan' then handle_juan(e)
|
515
|
-
when 'l' then handle_l(e)
|
516
|
-
when 'lb' then handle_lb(e)
|
517
|
-
when 'lem' then handle_lem(e)
|
518
|
-
when 'lg' then handle_lg(e)
|
519
|
-
when 'list' then handle_list(e)
|
520
|
-
when 'mulu' then handle_mulu(e)
|
521
|
-
when 'note' then handle_note(e)
|
522
|
-
when 'milestone' then handle_milestone(e)
|
523
|
-
when 'p' then handle_p(e)
|
524
|
-
when 'rdg' then handle_rdg(e)
|
525
|
-
when 'reg' then ''
|
526
|
-
when 'row' then handle_row(e)
|
527
|
-
when 'sic' then handle_sic(e)
|
528
|
-
when 'sg' then handle_sg(e)
|
529
|
-
when 't' then handle_t(e)
|
530
|
-
when 'tt' then handle_tt(e)
|
531
|
-
when 'table' then handle_table(e)
|
532
|
-
when 'unclear' then handle_unclear(e)
|
533
|
-
else traverse(e)
|
534
|
-
end
|
535
|
-
r
|
536
|
-
end
|
537
|
-
|
538
|
-
def handle_note(e)
|
476
|
+
def e_note(e)
|
539
477
|
n = e['n']
|
540
478
|
if e.has_attribute?('type')
|
541
479
|
t = e['type']
|
@@ -580,36 +518,8 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
580
518
|
end
|
581
519
|
end
|
582
520
|
|
583
|
-
def handle_note_orig(e, anchor_type=nil)
|
584
|
-
n = e['n']
|
585
|
-
@pass << false
|
586
|
-
s = traverse(e)
|
587
|
-
@pass.pop
|
588
|
-
@notes_orig[@juan][n] = s
|
589
|
-
@notes_mod[@juan][n] = s
|
590
|
-
|
591
|
-
c = @series
|
592
|
-
|
593
|
-
# 如果 CBETA 沒有修訂,就跟底本的註一樣
|
594
|
-
# 但是 CBETA 修訂後的編號,有時會加上 a, b
|
595
|
-
# T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
|
596
|
-
c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
|
597
521
|
|
598
|
-
|
599
|
-
when 'biao' then " data-label='標#{n[-2..-1]}'"
|
600
|
-
when 'ke' then " data-label='科#{n[-2..-1]}'"
|
601
|
-
else ''
|
602
|
-
end
|
603
|
-
s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
|
604
|
-
r = "<r w='#{@orig}'>#{s}</r>"
|
605
|
-
|
606
|
-
unless @mod_notes.include?(n)
|
607
|
-
r += "<r w='【CBETA】'>#{s}</r>"
|
608
|
-
end
|
609
|
-
r
|
610
|
-
end
|
611
|
-
|
612
|
-
def handle_p(e)
|
522
|
+
def e_p(e)
|
613
523
|
if e.key? 'type'
|
614
524
|
r = "<p class='%s'>" % e['type']
|
615
525
|
else
|
@@ -620,24 +530,24 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
620
530
|
r + '</p>'
|
621
531
|
end
|
622
532
|
|
623
|
-
def
|
533
|
+
def e_rdg(e)
|
624
534
|
r = traverse(e)
|
625
535
|
"<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
|
626
536
|
end
|
627
537
|
|
628
|
-
def
|
538
|
+
def e_row(e)
|
629
539
|
"<div class='bip-table-row'>" + traverse(e) + "</div>"
|
630
540
|
end
|
631
541
|
|
632
|
-
def
|
542
|
+
def e_sg(e)
|
633
543
|
'(' + traverse(e) + ')'
|
634
544
|
end
|
635
545
|
|
636
|
-
def
|
546
|
+
def e_sic(e)
|
637
547
|
"<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
|
638
548
|
end
|
639
549
|
|
640
|
-
def
|
550
|
+
def e_t(e)
|
641
551
|
if e.has_attribute? 'place'
|
642
552
|
return '' if e['place'].include? 'foot'
|
643
553
|
end
|
@@ -659,15 +569,112 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
659
569
|
end
|
660
570
|
end
|
661
571
|
|
662
|
-
def
|
572
|
+
def e_tt(e)
|
663
573
|
@tt_type = e['type']
|
664
574
|
traverse(e)
|
665
575
|
end
|
666
576
|
|
667
|
-
def
|
577
|
+
def e_table(e)
|
668
578
|
"<div class='bip-table'>" + traverse(e) + "</div>"
|
669
579
|
end
|
580
|
+
|
581
|
+
def e_unclear(e)
|
582
|
+
'▆'
|
583
|
+
end
|
584
|
+
|
585
|
+
def filter_html(html, ed)
|
586
|
+
progress "filter html ed: #{ed}"
|
587
|
+
frag = Nokogiri::HTML.fragment(html)
|
588
|
+
frag.search("r").each do |node|
|
589
|
+
if node['w'].include? ed
|
590
|
+
html_only_this_edition = filter_html(node.inner_html, ed)
|
591
|
+
node.add_previous_sibling html_only_this_edition
|
592
|
+
end
|
593
|
+
node.remove
|
594
|
+
end
|
595
|
+
frag.to_html
|
596
|
+
end
|
597
|
+
|
598
|
+
def get_editions(doc)
|
599
|
+
r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
|
600
|
+
doc.xpath('//lem|//rdg').each do |e|
|
601
|
+
w = e['wit'].scan(/【.*?】/)
|
602
|
+
r.merge w
|
603
|
+
end
|
604
|
+
r
|
605
|
+
end
|
670
606
|
|
607
|
+
|
608
|
+
def handle_node(e, mode)
|
609
|
+
return '' if e.comment?
|
610
|
+
return handle_text(e, mode) if e.text?
|
611
|
+
return '' if PASS.include?(e.name)
|
612
|
+
r = case e.name
|
613
|
+
when 'anchor' then e_anchor(e)
|
614
|
+
when 'app' then e_app(e)
|
615
|
+
when 'byline' then e_byline(e)
|
616
|
+
when 'cell' then e_cell(e)
|
617
|
+
when 'corr' then e_corr(e)
|
618
|
+
when 'div' then e_div(e)
|
619
|
+
when 'figure' then e_figure(e)
|
620
|
+
when 'foreign' then ''
|
621
|
+
when 'g' then e_g(e, mode)
|
622
|
+
when 'graphic' then e_graphic(e)
|
623
|
+
when 'head' then e_head(e)
|
624
|
+
when 'item' then e_item(e)
|
625
|
+
when 'juan' then e_juan(e)
|
626
|
+
when 'l' then e_l(e)
|
627
|
+
when 'lb' then e_lb(e)
|
628
|
+
when 'lem' then e_lem(e)
|
629
|
+
when 'lg' then e_lg(e)
|
630
|
+
when 'list' then e_list(e)
|
631
|
+
when 'mulu' then e_mulu(e)
|
632
|
+
when 'note' then e_note(e)
|
633
|
+
when 'milestone' then e_milestone(e)
|
634
|
+
when 'p' then e_p(e)
|
635
|
+
when 'rdg' then e_rdg(e)
|
636
|
+
when 'reg' then ''
|
637
|
+
when 'row' then e_row(e)
|
638
|
+
when 'sic' then e_sic(e)
|
639
|
+
when 'sg' then e_sg(e)
|
640
|
+
when 't' then e_t(e)
|
641
|
+
when 'tt' then e_tt(e)
|
642
|
+
when 'table' then e_table(e)
|
643
|
+
when 'unclear' then e_unclear(e)
|
644
|
+
else traverse(e)
|
645
|
+
end
|
646
|
+
r
|
647
|
+
end
|
648
|
+
|
649
|
+
def handle_note_orig(e, anchor_type=nil)
|
650
|
+
n = e['n']
|
651
|
+
@pass << false
|
652
|
+
s = traverse(e)
|
653
|
+
@pass.pop
|
654
|
+
@notes_orig[@juan][n] = s
|
655
|
+
@notes_mod[@juan][n] = s
|
656
|
+
|
657
|
+
c = @series
|
658
|
+
|
659
|
+
# 如果 CBETA 沒有修訂,就跟底本的註一樣
|
660
|
+
# 但是 CBETA 修訂後的編號,有時會加上 a, b
|
661
|
+
# T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
|
662
|
+
c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
|
663
|
+
|
664
|
+
label = case anchor_type
|
665
|
+
when 'biao' then " data-label='標#{n[-2..-1]}'"
|
666
|
+
when 'ke' then " data-label='科#{n[-2..-1]}'"
|
667
|
+
else ''
|
668
|
+
end
|
669
|
+
s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
|
670
|
+
r = "<r w='#{@orig}'>#{s}</r>"
|
671
|
+
|
672
|
+
unless @mod_notes.include?(n)
|
673
|
+
r += "<r w='【CBETA】'>#{s}</r>"
|
674
|
+
end
|
675
|
+
r
|
676
|
+
end
|
677
|
+
|
671
678
|
def handle_text(e, mode)
|
672
679
|
s = e.content().chomp
|
673
680
|
return '' if s.empty?
|
@@ -689,11 +696,8 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
689
696
|
r
|
690
697
|
end
|
691
698
|
|
692
|
-
def handle_unclear(e)
|
693
|
-
'▆'
|
694
|
-
end
|
695
|
-
|
696
699
|
def html_back(juan_no, ed)
|
700
|
+
progress "html back, juan: #{juan_no}, ed: #{ed}"
|
697
701
|
r = ''
|
698
702
|
case ed
|
699
703
|
when '【CBETA】'
|
@@ -843,6 +847,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
843
847
|
text = traverse(body)
|
844
848
|
text
|
845
849
|
end
|
850
|
+
|
851
|
+
def progress(msg)
|
852
|
+
puts Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
853
|
+
puts msg
|
854
|
+
end
|
846
855
|
|
847
856
|
def to_html(e)
|
848
857
|
e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|