cbeta 2.2.22 → 2.2.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cbeta.rb +10 -0
- data/lib/cbeta/p5a_to_html.rb +1 -1
- data/lib/cbeta/p5a_to_html_for_every_edition.rb +134 -125
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d19eb4a198b323a6ee4601eee34ec46d56d41c50
|
4
|
+
data.tar.gz: 359ce18558763b5b65ac05682c4619d5fe106b7a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b25957a15c65e3e49cf6ef6f33a6e8bbec950cac63ebbb4a164286c55ce16080b4824d4b033773d5407d0b86fe9c0bc398eb5685ac7a09f9ee48c7d936b81cf4
|
7
|
+
data.tar.gz: 4af74f6871288f1bc15a8b858ccc03fd688cec5a6621578e9a38764c1f25f8dd3051f6088bd095e546d05f96ea336368b25aa2656384d06674a2e85036c08562
|
data/lib/cbeta.rb
CHANGED
@@ -32,6 +32,16 @@ class CBETA
|
|
32
32
|
vol.sub(/^(#{CANON}).*$/, '\1')
|
33
33
|
end
|
34
34
|
|
35
|
+
# @param file_basename[String] XML檔主檔名, 例如 "T01n0001" 或 "T25n1510a"
|
36
|
+
# @param lb[String] 例如 "0001a01" 或 "0757b29"
|
37
|
+
# @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
|
38
|
+
def self.get_linehead(file_basename, lb)
|
39
|
+
r = file_basename
|
40
|
+
r += '_' if file_basename.match(/\d$/)
|
41
|
+
r += 'p' + lb
|
42
|
+
r
|
43
|
+
end
|
44
|
+
|
35
45
|
# 由 冊號 及 典籍編號 取得 XML 主檔名
|
36
46
|
# @param vol[String] 冊號, 例如 "T01" 或 "GA009"
|
37
47
|
# @param work[String] 典籍編號, 例如 "T0001" 或 "GA0008"
|
data/lib/cbeta/p5a_to_html.rb
CHANGED
@@ -72,7 +72,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
72
72
|
|
73
73
|
def convert_all
|
74
74
|
Dir.entries(@xml_root).sort.each do |c|
|
75
|
-
next unless c.match(/^#{CBETA
|
75
|
+
next unless c.match(/^#{CBETA::CANON}$/)
|
76
76
|
convert_canon(c)
|
77
77
|
end
|
78
78
|
end
|
@@ -139,29 +139,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
139
139
|
end
|
140
140
|
end
|
141
141
|
|
142
|
-
|
143
|
-
def filter_html(html, ed)
|
144
|
-
frag = Nokogiri::HTML.fragment(html)
|
145
|
-
frag.search("r").each do |node|
|
146
|
-
if node['w'].include? ed
|
147
|
-
html_only_this_edition = filter_html(node.inner_html, ed)
|
148
|
-
node.add_previous_sibling html_only_this_edition
|
149
|
-
end
|
150
|
-
node.remove
|
151
|
-
end
|
152
|
-
frag.to_html
|
153
|
-
end
|
154
|
-
|
155
|
-
def get_editions(doc)
|
156
|
-
r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
|
157
|
-
doc.xpath('//lem|//rdg').each do |e|
|
158
|
-
w = e['wit'].scan(/【.*?】/)
|
159
|
-
r.merge w
|
160
|
-
end
|
161
|
-
r
|
162
|
-
end
|
163
|
-
|
164
|
-
def handle_anchor(e)
|
142
|
+
def e_anchor(e)
|
165
143
|
id = e['id']
|
166
144
|
if e.has_attribute?('id')
|
167
145
|
if id.start_with?('nkr_note_orig')
|
@@ -184,7 +162,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
184
162
|
''
|
185
163
|
end
|
186
164
|
|
187
|
-
def
|
165
|
+
def e_app(e)
|
188
166
|
r = ''
|
189
167
|
if e['type'] == 'star'
|
190
168
|
c = e['corresp'][1..-1]
|
@@ -193,14 +171,14 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
193
171
|
r + traverse(e)
|
194
172
|
end
|
195
173
|
|
196
|
-
def
|
174
|
+
def e_byline(e)
|
197
175
|
r = '<p class="byline">'
|
198
176
|
r += line_info
|
199
177
|
r += traverse(e)
|
200
178
|
r + '</p>'
|
201
179
|
end
|
202
180
|
|
203
|
-
def
|
181
|
+
def e_cell(e)
|
204
182
|
doc = Nokogiri::XML::Document.new
|
205
183
|
cell = doc.create_element('div')
|
206
184
|
cell['class'] = 'bip-table-cell'
|
@@ -210,7 +188,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
210
188
|
to_html(cell)
|
211
189
|
end
|
212
190
|
|
213
|
-
def
|
191
|
+
def e_corr(e)
|
214
192
|
r = ''
|
215
193
|
if e.parent.name == 'choice'
|
216
194
|
sic = e.parent.at_xpath('sic')
|
@@ -231,7 +209,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
231
209
|
r + "<r w='【CBETA】' l='#{@lb}'><span class='cbeta'>%s</span></r>" % traverse(e)
|
232
210
|
end
|
233
211
|
|
234
|
-
def
|
212
|
+
def e_div(e)
|
235
213
|
@div_count += 1
|
236
214
|
n = @div_count
|
237
215
|
if e.has_attribute? 'type'
|
@@ -244,11 +222,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
244
222
|
end
|
245
223
|
end
|
246
224
|
|
247
|
-
def
|
225
|
+
def e_figure(e)
|
248
226
|
"<p class='figure'>%s</p>" % traverse(e)
|
249
227
|
end
|
250
228
|
|
251
|
-
def
|
229
|
+
def e_g(e, mode)
|
252
230
|
# if 有 <mapping type="unicode">
|
253
231
|
# if 不在 Unicode Extension C, D, E 範圍裡
|
254
232
|
# 直接採用
|
@@ -326,12 +304,12 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
326
304
|
"<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
|
327
305
|
end
|
328
306
|
|
329
|
-
def
|
307
|
+
def e_graphic(e)
|
330
308
|
url = File.basename(e['url'])
|
331
309
|
"<span imgsrc='#{url}' class='graphic'></span>"
|
332
310
|
end
|
333
311
|
|
334
|
-
def
|
312
|
+
def e_head(e)
|
335
313
|
r = ''
|
336
314
|
unless e['type'] == 'added'
|
337
315
|
i = @open_divs.size
|
@@ -340,15 +318,15 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
340
318
|
r
|
341
319
|
end
|
342
320
|
|
343
|
-
def
|
321
|
+
def e_item(e)
|
344
322
|
"<li>%s</li>\n" % traverse(e)
|
345
323
|
end
|
346
324
|
|
347
|
-
def
|
325
|
+
def e_juan(e)
|
348
326
|
"<p class='juan'>%s</p>" % traverse(e)
|
349
327
|
end
|
350
328
|
|
351
|
-
def
|
329
|
+
def e_l(e)
|
352
330
|
if @lg_type == 'abnormal'
|
353
331
|
return traverse(e)
|
354
332
|
end
|
@@ -380,13 +358,13 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
380
358
|
r
|
381
359
|
end
|
382
360
|
|
383
|
-
def
|
361
|
+
def e_lb(e)
|
384
362
|
# 卍續藏有 X 跟 R 兩種 lb, 只處理 X
|
385
363
|
return '' if e['ed'] != @series
|
386
364
|
|
387
365
|
@char_count = 1
|
388
366
|
@lb = e['n']
|
389
|
-
line_head = @sutra_no
|
367
|
+
line_head = CBETA.get_linehead(@sutra_no, e['n'])
|
390
368
|
r = ''
|
391
369
|
#if e.parent.name == 'lg' and $lg_row_open
|
392
370
|
if @lg_row_open && !@in_l
|
@@ -408,7 +386,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
408
386
|
r
|
409
387
|
end
|
410
388
|
|
411
|
-
def
|
389
|
+
def e_lem(e)
|
412
390
|
r = ''
|
413
391
|
content = traverse(e)
|
414
392
|
wit = e['wit']
|
@@ -436,7 +414,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
436
414
|
r + ("<r w='#{w}' l='#{@lb}'>%s</r>" % content)
|
437
415
|
end
|
438
416
|
|
439
|
-
def
|
417
|
+
def e_lg(e)
|
440
418
|
r = ''
|
441
419
|
@lg_type = e['type']
|
442
420
|
if @lg_type == 'abnormal'
|
@@ -461,11 +439,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
461
439
|
r
|
462
440
|
end
|
463
441
|
|
464
|
-
def
|
442
|
+
def e_list(e)
|
465
443
|
"<ul>%s</ul>" % traverse(e)
|
466
444
|
end
|
467
445
|
|
468
|
-
def
|
446
|
+
def e_milestone(e)
|
469
447
|
r = ''
|
470
448
|
if e['unit'] == 'juan'
|
471
449
|
|
@@ -484,7 +462,7 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
484
462
|
r
|
485
463
|
end
|
486
464
|
|
487
|
-
def
|
465
|
+
def e_mulu(e)
|
488
466
|
r = ''
|
489
467
|
if e['type'] == '品'
|
490
468
|
@pass << false
|
@@ -493,49 +471,9 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
493
471
|
end
|
494
472
|
r
|
495
473
|
end
|
474
|
+
|
496
475
|
|
497
|
-
def
|
498
|
-
return '' if e.comment?
|
499
|
-
return handle_text(e, mode) if e.text?
|
500
|
-
return '' if PASS.include?(e.name)
|
501
|
-
r = case e.name
|
502
|
-
when 'anchor' then handle_anchor(e)
|
503
|
-
when 'app' then handle_app(e)
|
504
|
-
when 'byline' then handle_byline(e)
|
505
|
-
when 'cell' then handle_cell(e)
|
506
|
-
when 'corr' then handle_corr(e)
|
507
|
-
when 'div' then handle_div(e)
|
508
|
-
when 'figure' then handle_figure(e)
|
509
|
-
when 'foreign' then ''
|
510
|
-
when 'g' then handle_g(e, mode)
|
511
|
-
when 'graphic' then handle_graphic(e)
|
512
|
-
when 'head' then handle_head(e)
|
513
|
-
when 'item' then handle_item(e)
|
514
|
-
when 'juan' then handle_juan(e)
|
515
|
-
when 'l' then handle_l(e)
|
516
|
-
when 'lb' then handle_lb(e)
|
517
|
-
when 'lem' then handle_lem(e)
|
518
|
-
when 'lg' then handle_lg(e)
|
519
|
-
when 'list' then handle_list(e)
|
520
|
-
when 'mulu' then handle_mulu(e)
|
521
|
-
when 'note' then handle_note(e)
|
522
|
-
when 'milestone' then handle_milestone(e)
|
523
|
-
when 'p' then handle_p(e)
|
524
|
-
when 'rdg' then handle_rdg(e)
|
525
|
-
when 'reg' then ''
|
526
|
-
when 'row' then handle_row(e)
|
527
|
-
when 'sic' then handle_sic(e)
|
528
|
-
when 'sg' then handle_sg(e)
|
529
|
-
when 't' then handle_t(e)
|
530
|
-
when 'tt' then handle_tt(e)
|
531
|
-
when 'table' then handle_table(e)
|
532
|
-
when 'unclear' then handle_unclear(e)
|
533
|
-
else traverse(e)
|
534
|
-
end
|
535
|
-
r
|
536
|
-
end
|
537
|
-
|
538
|
-
def handle_note(e)
|
476
|
+
def e_note(e)
|
539
477
|
n = e['n']
|
540
478
|
if e.has_attribute?('type')
|
541
479
|
t = e['type']
|
@@ -580,36 +518,8 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
580
518
|
end
|
581
519
|
end
|
582
520
|
|
583
|
-
def handle_note_orig(e, anchor_type=nil)
|
584
|
-
n = e['n']
|
585
|
-
@pass << false
|
586
|
-
s = traverse(e)
|
587
|
-
@pass.pop
|
588
|
-
@notes_orig[@juan][n] = s
|
589
|
-
@notes_mod[@juan][n] = s
|
590
|
-
|
591
|
-
c = @series
|
592
|
-
|
593
|
-
# 如果 CBETA 沒有修訂,就跟底本的註一樣
|
594
|
-
# 但是 CBETA 修訂後的編號,有時會加上 a, b
|
595
|
-
# T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
|
596
|
-
c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
|
597
521
|
|
598
|
-
|
599
|
-
when 'biao' then " data-label='標#{n[-2..-1]}'"
|
600
|
-
when 'ke' then " data-label='科#{n[-2..-1]}'"
|
601
|
-
else ''
|
602
|
-
end
|
603
|
-
s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
|
604
|
-
r = "<r w='#{@orig}'>#{s}</r>"
|
605
|
-
|
606
|
-
unless @mod_notes.include?(n)
|
607
|
-
r += "<r w='【CBETA】'>#{s}</r>"
|
608
|
-
end
|
609
|
-
r
|
610
|
-
end
|
611
|
-
|
612
|
-
def handle_p(e)
|
522
|
+
def e_p(e)
|
613
523
|
if e.key? 'type'
|
614
524
|
r = "<p class='%s'>" % e['type']
|
615
525
|
else
|
@@ -620,24 +530,24 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
620
530
|
r + '</p>'
|
621
531
|
end
|
622
532
|
|
623
|
-
def
|
533
|
+
def e_rdg(e)
|
624
534
|
r = traverse(e)
|
625
535
|
"<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
|
626
536
|
end
|
627
537
|
|
628
|
-
def
|
538
|
+
def e_row(e)
|
629
539
|
"<div class='bip-table-row'>" + traverse(e) + "</div>"
|
630
540
|
end
|
631
541
|
|
632
|
-
def
|
542
|
+
def e_sg(e)
|
633
543
|
'(' + traverse(e) + ')'
|
634
544
|
end
|
635
545
|
|
636
|
-
def
|
546
|
+
def e_sic(e)
|
637
547
|
"<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
|
638
548
|
end
|
639
549
|
|
640
|
-
def
|
550
|
+
def e_t(e)
|
641
551
|
if e.has_attribute? 'place'
|
642
552
|
return '' if e['place'].include? 'foot'
|
643
553
|
end
|
@@ -659,15 +569,112 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
659
569
|
end
|
660
570
|
end
|
661
571
|
|
662
|
-
def
|
572
|
+
def e_tt(e)
|
663
573
|
@tt_type = e['type']
|
664
574
|
traverse(e)
|
665
575
|
end
|
666
576
|
|
667
|
-
def
|
577
|
+
def e_table(e)
|
668
578
|
"<div class='bip-table'>" + traverse(e) + "</div>"
|
669
579
|
end
|
580
|
+
|
581
|
+
def e_unclear(e)
|
582
|
+
'▆'
|
583
|
+
end
|
584
|
+
|
585
|
+
def filter_html(html, ed)
|
586
|
+
progress "filter html ed: #{ed}"
|
587
|
+
frag = Nokogiri::HTML.fragment(html)
|
588
|
+
frag.search("r").each do |node|
|
589
|
+
if node['w'].include? ed
|
590
|
+
html_only_this_edition = filter_html(node.inner_html, ed)
|
591
|
+
node.add_previous_sibling html_only_this_edition
|
592
|
+
end
|
593
|
+
node.remove
|
594
|
+
end
|
595
|
+
frag.to_html
|
596
|
+
end
|
597
|
+
|
598
|
+
def get_editions(doc)
|
599
|
+
r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
|
600
|
+
doc.xpath('//lem|//rdg').each do |e|
|
601
|
+
w = e['wit'].scan(/【.*?】/)
|
602
|
+
r.merge w
|
603
|
+
end
|
604
|
+
r
|
605
|
+
end
|
670
606
|
|
607
|
+
|
608
|
+
def handle_node(e, mode)
|
609
|
+
return '' if e.comment?
|
610
|
+
return handle_text(e, mode) if e.text?
|
611
|
+
return '' if PASS.include?(e.name)
|
612
|
+
r = case e.name
|
613
|
+
when 'anchor' then e_anchor(e)
|
614
|
+
when 'app' then e_app(e)
|
615
|
+
when 'byline' then e_byline(e)
|
616
|
+
when 'cell' then e_cell(e)
|
617
|
+
when 'corr' then e_corr(e)
|
618
|
+
when 'div' then e_div(e)
|
619
|
+
when 'figure' then e_figure(e)
|
620
|
+
when 'foreign' then ''
|
621
|
+
when 'g' then e_g(e, mode)
|
622
|
+
when 'graphic' then e_graphic(e)
|
623
|
+
when 'head' then e_head(e)
|
624
|
+
when 'item' then e_item(e)
|
625
|
+
when 'juan' then e_juan(e)
|
626
|
+
when 'l' then e_l(e)
|
627
|
+
when 'lb' then e_lb(e)
|
628
|
+
when 'lem' then e_lem(e)
|
629
|
+
when 'lg' then e_lg(e)
|
630
|
+
when 'list' then e_list(e)
|
631
|
+
when 'mulu' then e_mulu(e)
|
632
|
+
when 'note' then e_note(e)
|
633
|
+
when 'milestone' then e_milestone(e)
|
634
|
+
when 'p' then e_p(e)
|
635
|
+
when 'rdg' then e_rdg(e)
|
636
|
+
when 'reg' then ''
|
637
|
+
when 'row' then e_row(e)
|
638
|
+
when 'sic' then e_sic(e)
|
639
|
+
when 'sg' then e_sg(e)
|
640
|
+
when 't' then e_t(e)
|
641
|
+
when 'tt' then e_tt(e)
|
642
|
+
when 'table' then e_table(e)
|
643
|
+
when 'unclear' then e_unclear(e)
|
644
|
+
else traverse(e)
|
645
|
+
end
|
646
|
+
r
|
647
|
+
end
|
648
|
+
|
649
|
+
def handle_note_orig(e, anchor_type=nil)
|
650
|
+
n = e['n']
|
651
|
+
@pass << false
|
652
|
+
s = traverse(e)
|
653
|
+
@pass.pop
|
654
|
+
@notes_orig[@juan][n] = s
|
655
|
+
@notes_mod[@juan][n] = s
|
656
|
+
|
657
|
+
c = @series
|
658
|
+
|
659
|
+
# 如果 CBETA 沒有修訂,就跟底本的註一樣
|
660
|
+
# 但是 CBETA 修訂後的編號,有時會加上 a, b
|
661
|
+
# T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
|
662
|
+
c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
|
663
|
+
|
664
|
+
label = case anchor_type
|
665
|
+
when 'biao' then " data-label='標#{n[-2..-1]}'"
|
666
|
+
when 'ke' then " data-label='科#{n[-2..-1]}'"
|
667
|
+
else ''
|
668
|
+
end
|
669
|
+
s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
|
670
|
+
r = "<r w='#{@orig}'>#{s}</r>"
|
671
|
+
|
672
|
+
unless @mod_notes.include?(n)
|
673
|
+
r += "<r w='【CBETA】'>#{s}</r>"
|
674
|
+
end
|
675
|
+
r
|
676
|
+
end
|
677
|
+
|
671
678
|
def handle_text(e, mode)
|
672
679
|
s = e.content().chomp
|
673
680
|
return '' if s.empty?
|
@@ -689,11 +696,8 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
689
696
|
r
|
690
697
|
end
|
691
698
|
|
692
|
-
def handle_unclear(e)
|
693
|
-
'▆'
|
694
|
-
end
|
695
|
-
|
696
699
|
def html_back(juan_no, ed)
|
700
|
+
progress "html back, juan: #{juan_no}, ed: #{ed}"
|
697
701
|
r = ''
|
698
702
|
case ed
|
699
703
|
when '【CBETA】'
|
@@ -843,6 +847,11 @@ class CBETA::P5aToHTMLForEveryEdition
|
|
843
847
|
text = traverse(body)
|
844
848
|
text
|
845
849
|
end
|
850
|
+
|
851
|
+
def progress(msg)
|
852
|
+
puts Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
853
|
+
puts msg
|
854
|
+
end
|
846
855
|
|
847
856
|
def to_html(e)
|
848
857
|
e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.26
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-12-
|
11
|
+
date: 2016-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|