cbeta 3.6.16 → 3.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/cbeta/p5a_checker.rb +22 -18
- data/lib/cbeta.rb +31 -14
- data.tar.gz.sig +0 -0
- metadata +31 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 02f6421948b47e5d049f7e5a695b5c7b4b73519401e6bd13a24ba4e96b685eb3
|
|
4
|
+
data.tar.gz: 8f8bcd8c63d9dbc3b54aa9dfdd841b95b8819776c2a185ff04dee106ee3f538a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dbb2320988bcccc49f6daf616a78ff7e64bffcb94fe989d6c68955952dce891bf79ec580e7911a0099b343c0aaf675b5f6cde189dc344a734a5848b46a4101d5
|
|
7
|
+
data.tar.gz: 6dd81d74d311de2e7339fa045a9a2e78b94710a47e2eb9e9ebe0108611401ce51c02d1dabe27b31fd0c6488ed0e2d3527311df0de5ceff64fba479ad7cb65c24
|
checksums.yaml.gz.sig
ADDED
|
Binary file
|
data/lib/cbeta/p5a_checker.rb
CHANGED
|
@@ -17,6 +17,7 @@ require_relative 'cbeta_share'
|
|
|
17
17
|
# * [E12] note 直接出現在 lg 下
|
|
18
18
|
# * [E13] tt 直接出現在 lg 下
|
|
19
19
|
# * [E14] <anchor type="right"> 不應直接出現在 div 或 body 下
|
|
20
|
+
# * [E15] <note> corresp 無對應的 <note>
|
|
20
21
|
#
|
|
21
22
|
# * 警告類型
|
|
22
23
|
# * [W01] 夾注包夾注
|
|
@@ -33,7 +34,7 @@ class CBETA::P5aChecker
|
|
|
33
34
|
@xml_root = xml_root
|
|
34
35
|
@figures = figures
|
|
35
36
|
@log = log
|
|
36
|
-
@errors =
|
|
37
|
+
@errors = []
|
|
37
38
|
@g_errors = {}
|
|
38
39
|
end
|
|
39
40
|
|
|
@@ -118,17 +119,17 @@ class CBETA::P5aChecker
|
|
|
118
119
|
def display_errors
|
|
119
120
|
@g_errors.keys.sort.each do |k|
|
|
120
121
|
s = @g_errors[k].to_a.join(',')
|
|
121
|
-
@errors << "#{k} 無缺字資料,出現於:#{s}
|
|
122
|
+
@errors << "#{k} 無缺字資料,出現於:#{s}"
|
|
122
123
|
end
|
|
123
124
|
|
|
124
125
|
if @errors.empty?
|
|
125
126
|
puts "檢查完成,未發現錯誤。"
|
|
126
127
|
elsif @log.nil?
|
|
127
|
-
puts "
|
|
128
|
-
puts @errors
|
|
128
|
+
puts "發現 #{@errors.size} 錯誤:"
|
|
129
|
+
puts @errors.join("\n")
|
|
129
130
|
else
|
|
130
|
-
File.write(@log, @errors)
|
|
131
|
-
puts "
|
|
131
|
+
File.write(@log, @errors.join("\n"))
|
|
132
|
+
puts "發現 #{@errors.size} 錯誤,請查看 #{@log}"
|
|
132
133
|
end
|
|
133
134
|
end
|
|
134
135
|
|
|
@@ -197,13 +198,9 @@ class CBETA::P5aChecker
|
|
|
197
198
|
end
|
|
198
199
|
|
|
199
200
|
def e_note(e)
|
|
200
|
-
if e.parent.name == 'div'
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
if e.parent.name == 'lg'
|
|
205
|
-
error "[E12] note 直接出現在 lg 下"
|
|
206
|
-
end
|
|
201
|
+
error "[E11] note 直接出現在 div 下" if e.parent.name == 'div'
|
|
202
|
+
error "[E12] note 直接出現在 lg 下" if e.parent.name == 'lg'
|
|
203
|
+
e_note_corresp(e) if e.key?('corresp')
|
|
207
204
|
|
|
208
205
|
unless e['place'] == 'inline'
|
|
209
206
|
traverse(e)
|
|
@@ -213,11 +210,18 @@ class CBETA::P5aChecker
|
|
|
213
210
|
if @element_stack.include?('inline_note')
|
|
214
211
|
error "[W01] 夾注包夾注"
|
|
215
212
|
end
|
|
213
|
+
|
|
216
214
|
@element_stack << 'inline_note'
|
|
217
215
|
traverse(e)
|
|
218
216
|
@element_stack.pop
|
|
219
217
|
end
|
|
220
218
|
|
|
219
|
+
def e_note_corresp(e)
|
|
220
|
+
n = e['corresp'].delete_prefix('#')
|
|
221
|
+
return if @notes.include?(n)
|
|
222
|
+
error "[E15] note corresp #{n} 無對應 note"
|
|
223
|
+
end
|
|
224
|
+
|
|
221
225
|
def e_p(e)
|
|
222
226
|
if e.parent.name == 'list'
|
|
223
227
|
error "[E10] p 不應直接出現在 list 下"
|
|
@@ -261,8 +265,8 @@ class CBETA::P5aChecker
|
|
|
261
265
|
end
|
|
262
266
|
|
|
263
267
|
def error(msg)
|
|
264
|
-
s = "#{msg}, #{@basename}, lb: #{@lb}
|
|
265
|
-
|
|
268
|
+
s = "#{msg}, #{@basename}, lb: #{@lb}"
|
|
269
|
+
puts "\n#{s}"
|
|
266
270
|
@errors << s
|
|
267
271
|
end
|
|
268
272
|
|
|
@@ -281,7 +285,7 @@ class CBETA::P5aChecker
|
|
|
281
285
|
|
|
282
286
|
s = File.read(fn)
|
|
283
287
|
if s.include? "\u200B"
|
|
284
|
-
@errors << "#{@basename} 含有 U+200B Zero Width Space
|
|
288
|
+
@errors << "#{@basename} 含有 U+200B Zero Width Space 字元"
|
|
285
289
|
end
|
|
286
290
|
|
|
287
291
|
doc = Nokogiri::XML(s)
|
|
@@ -292,7 +296,7 @@ class CBETA::P5aChecker
|
|
|
292
296
|
@element_stack = []
|
|
293
297
|
traverse(doc.root)
|
|
294
298
|
else
|
|
295
|
-
@errors << "錯誤: #{@basename} not well-formed
|
|
299
|
+
@errors << "錯誤: #{@basename} not well-formed"
|
|
296
300
|
end
|
|
297
301
|
end
|
|
298
302
|
|
|
@@ -315,7 +319,7 @@ class CBETA::P5aChecker
|
|
|
315
319
|
end
|
|
316
320
|
|
|
317
321
|
def handle_vol(folder)
|
|
318
|
-
|
|
322
|
+
print "\rcheck vol: #{File.basename(folder)} "
|
|
319
323
|
Dir.entries(folder).sort.each do |f|
|
|
320
324
|
next if f.start_with? '.'
|
|
321
325
|
path = File.join(folder, f)
|
data/lib/cbeta.rb
CHANGED
|
@@ -12,6 +12,16 @@ class CBETA
|
|
|
12
12
|
DATA = File.join(File.dirname(__FILE__), 'data')
|
|
13
13
|
PUNCS = ',.()[] 。‧.,、;?!:︰/()「」『』《》<>〈〉〔〕[]【】〖〗〃…—─ ~│┬▆△*+-='
|
|
14
14
|
|
|
15
|
+
# 經號 (不含 Canon ID):
|
|
16
|
+
# 四碼數字 T0001
|
|
17
|
+
# 四碼數字 + 英文字母 T0150A, T0128a
|
|
18
|
+
# 英文字母 + 三碼數字 JA041, ZWa073
|
|
19
|
+
WORK_PART = '\d{4}[a-zA-Z]?|[ABa]\d{3}'
|
|
20
|
+
|
|
21
|
+
# XML file 主檔名
|
|
22
|
+
# GA010n0009
|
|
23
|
+
BASENAME = "(?:#{CANON})\\d{2,3}n(?:#{WORK_PART})"
|
|
24
|
+
|
|
15
25
|
# 由 行首資訊 取得 藏經 ID
|
|
16
26
|
# @param linehead[String] 行首資訊, 例如 "T01n0001_p0001a01" 或 "GA009n0008_p0003a01"
|
|
17
27
|
# @return [String] 藏經 ID,例如 "T" 或 "GA"
|
|
@@ -37,6 +47,8 @@ class CBETA
|
|
|
37
47
|
# @param lb[String] 例如 "0001a01" 或 "0757b29"
|
|
38
48
|
# @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
|
|
39
49
|
def self.get_linehead(file_basename, lb)
|
|
50
|
+
return nil if file_basename.nil?
|
|
51
|
+
|
|
40
52
|
if file_basename.match(/^(T\d\dn0220)/)
|
|
41
53
|
r = $1
|
|
42
54
|
else
|
|
@@ -46,6 +58,22 @@ class CBETA
|
|
|
46
58
|
r += 'p' + lb
|
|
47
59
|
r
|
|
48
60
|
end
|
|
61
|
+
|
|
62
|
+
# 由 XML檔主檔名 取得 典籍編號
|
|
63
|
+
# @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
|
|
64
|
+
# @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
|
|
65
|
+
def self.get_work_id_from_file_basename(fn)
|
|
66
|
+
r = fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
|
|
67
|
+
r = 'T0220' if r.start_with? 'T0220'
|
|
68
|
+
r
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# 由 行首資訊 取得 典籍編號
|
|
72
|
+
# @param linehead[String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
|
|
73
|
+
# @return [String] 典籍編號,例如 "T0001" 或 "T1510a"
|
|
74
|
+
def self.get_work_id_from_linehead(linehead)
|
|
75
|
+
linehead.sub(/^(#{CANON})\d{2,3}n(#{WORK_PART}).*$/, '\1\2')
|
|
76
|
+
end
|
|
49
77
|
|
|
50
78
|
# 由 冊號 及 典籍編號 取得 XML 主檔名
|
|
51
79
|
# @param vol[String] 冊號, 例如 "T01" 或 "GA009"
|
|
@@ -114,24 +142,13 @@ class CBETA
|
|
|
114
142
|
# ex: J36nB348_p0284c01
|
|
115
143
|
# @return [String] XML檔相對路徑,例如 "GA/GA009/GA009n0008.xml"
|
|
116
144
|
def self.linehead_to_xml_file_path(linehead)
|
|
117
|
-
|
|
118
|
-
w = '(?:\d+[a-zA-Z]?|[AB]\d{3})'
|
|
119
|
-
if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n#{w}).*$/)
|
|
145
|
+
if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n(?:#{WORK_PART})).*$/)
|
|
120
146
|
File.join(m[:canon], m[:vol], m[:work]+'.xml')
|
|
121
147
|
else
|
|
122
148
|
nil
|
|
123
149
|
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
# 由 XML檔主檔名 取得 典籍編號
|
|
127
|
-
# @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
|
|
128
|
-
# @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
|
|
129
|
-
def self.get_work_id_from_file_basename(fn)
|
|
130
|
-
r = fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
|
|
131
|
-
r = 'T0220' if r.start_with? 'T0220'
|
|
132
|
-
r
|
|
133
|
-
end
|
|
134
|
-
|
|
150
|
+
end
|
|
151
|
+
|
|
135
152
|
# 由「藏經 ID」取得「排序用編號」,例如:傳入 "T" 回傳 "A";傳入 "X" 回傳 "B"
|
|
136
153
|
# @param canon [String] 藏經 ID
|
|
137
154
|
# @return [String] 排序用編號
|
data.tar.gz.sig
ADDED
|
Binary file
|
metadata
CHANGED
|
@@ -1,13 +1,40 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cbeta
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.7.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ray Chou
|
|
8
8
|
bindir: bin
|
|
9
|
-
cert_chain:
|
|
10
|
-
|
|
9
|
+
cert_chain:
|
|
10
|
+
- |
|
|
11
|
+
-----BEGIN CERTIFICATE-----
|
|
12
|
+
MIIEaDCCAtCgAwIBAgIBATANBgkqhkiG9w0BAQsFADA9MQ8wDQYDVQQDDAZ6aG91
|
|
13
|
+
YngxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkWA2NvbTAe
|
|
14
|
+
Fw0yNTEwMTMwNjM4MjdaFw0yNjEwMTMwNjM4MjdaMD0xDzANBgNVBAMMBnpob3Vi
|
|
15
|
+
eDEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPyLGQBGRYDY29tMIIB
|
|
16
|
+
ojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAiyVQ5WW6oy4b8drYYebSMVi+
|
|
17
|
+
EC1xHNgAuTmlKF0w/Z2mRwL7rTOt/d3zUpTGRwBiqz3JF4ZAJksei6y7UzbA4qDb
|
|
18
|
+
WZDv+s6y6SZfeB+1JLDQOGJ+XxiM6ezsgMyZIeAOXciELTJh/x/R6pRatVDPyOAB
|
|
19
|
+
E1uv/2qGTP8Vy76fH0N9E5WU7xccPWR9uFlPKcpiAS1werNVS8/UCSyeS90o5ckE
|
|
20
|
+
rp5jOTpy7Dtzlz82qMDFGFaLh1Zo7SYeNEJlroZYec9vdnYUWmyT52l12IH6uFll
|
|
21
|
+
DvnhRkwVIdQ9l2as1PQx9eg69Jf9qSCAuOXMVojNkwWLsezyjB5CcidL8wx8Sifg
|
|
22
|
+
mKWoeNrWYbwH1h9B1dzX2jIwqcr8BNI2usK42YWyxiJGbQr+SBHhXEt25s3AVH+P
|
|
23
|
+
YUby2af2Gcb5ltpqqdXFoO8PpDODWtUcHM3NTL2uGbKvq1Ai0GhPJNHPco2SW9DA
|
|
24
|
+
AGQFSEgJj7BlHDHcYpOXfsuQxnKXeeCw6dVmyYfPAgMBAAGjczBxMAkGA1UdEwQC
|
|
25
|
+
MAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQWBBTqBx17iPvliZ0RKL1RZluB50Uc/jAb
|
|
26
|
+
BgNVHREEFDASgRB6aG91YnhAZ21haWwuY29tMBsGA1UdEgQUMBKBEHpob3VieEBn
|
|
27
|
+
bWFpbC5jb20wDQYJKoZIhvcNAQELBQADggGBAF+XmQPhEcuiRl/Zb+1vIBnR8Z8Q
|
|
28
|
+
mTJm4inGy8boLAz71Pg04yI+moj7A0CSS/xbv1MtZqOzTxGFm16xLczY4k6BuRof
|
|
29
|
+
C9usw3Z7val3rHRzmdhLqkX4ALrLzd/G0uirARIQ08N5r0ETIkNFCdePtRHH8uk3
|
|
30
|
+
Er0v4v4QT9ATyrAs4iImWlhmWZIEXOdhBqW01c9/cm5Gcg/WBjdEplpJtGGnMkdc
|
|
31
|
+
zQQscleWWbCX7QKjR9rSCPNteUuzAoS8gNCbLNQF88aOFzm704dePli3WNL/GTvy
|
|
32
|
+
frO5NqUXpwCOzgCbMKfxYBw87WDA8yAE2Cn3Kw1bvX/fheUNr++QyVY8sqVDOlUC
|
|
33
|
+
TovfZCtNma/D0DD8q8enl7UEwKWEs3IC+8bu6jlzYn/6BcLTkKwzB8NXSLVGkFva
|
|
34
|
+
xyQse8slbARanDyyT6HCYtlaKD9WYw647tkJhyY2GniloCNcOJS0URvhlulLzJYg
|
|
35
|
+
9CrAxVAO2NMeM6tbxg1VjYbqopjoLCwLdAJouA==
|
|
36
|
+
-----END CERTIFICATE-----
|
|
37
|
+
date: 2025-11-12 00:00:00.000000000 Z
|
|
11
38
|
dependencies:
|
|
12
39
|
- !ruby/object:Gem::Dependency
|
|
13
40
|
name: unihan2
|
|
@@ -118,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
118
145
|
- !ruby/object:Gem::Version
|
|
119
146
|
version: '0'
|
|
120
147
|
requirements: []
|
|
121
|
-
rubygems_version: 3.
|
|
148
|
+
rubygems_version: 3.7.2
|
|
122
149
|
specification_version: 4
|
|
123
150
|
summary: CBETA Tools
|
|
124
151
|
test_files: []
|
metadata.gz.sig
ADDED
|
Binary file
|