cbeta 3.6.16 → 3.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54335ea806bf1c208341687f6028b43919452a9227cc72702dc2e3c39bcc0930
4
- data.tar.gz: de5b6004c9313c55b8cc4b0bf9bbf04a8dffdadf2dcf12a850b26e12fe0e6132
3
+ metadata.gz: 02f6421948b47e5d049f7e5a695b5c7b4b73519401e6bd13a24ba4e96b685eb3
4
+ data.tar.gz: 8f8bcd8c63d9dbc3b54aa9dfdd841b95b8819776c2a185ff04dee106ee3f538a
5
5
  SHA512:
6
- metadata.gz: 9bd998e632c0e2124d9c068ecb2abbe799a4545c36e242410ccb8ae7ccaf4736724508cbcfc51705427deeb413603f3600c324120798e326ee93f5718158960f
7
- data.tar.gz: 955c2c07ad20786dd38c974ffcec17b0a27391a086f7d6556ec99f493a4f2b9569d47f2b211573776e885d396d45f98ab12dba661bee868fdd8462e07dbf9eae
6
+ metadata.gz: dbb2320988bcccc49f6daf616a78ff7e64bffcb94fe989d6c68955952dce891bf79ec580e7911a0099b343c0aaf675b5f6cde189dc344a734a5848b46a4101d5
7
+ data.tar.gz: 6dd81d74d311de2e7339fa045a9a2e78b94710a47e2eb9e9ebe0108611401ce51c02d1dabe27b31fd0c6488ed0e2d3527311df0de5ceff64fba479ad7cb65c24
checksums.yaml.gz.sig ADDED
Binary file
@@ -17,6 +17,7 @@ require_relative 'cbeta_share'
17
17
  # * [E12] note 直接出現在 lg 下
18
18
  # * [E13] tt 直接出現在 lg 下
19
19
  # * [E14] <anchor type="right"> 不應直接出現在 div 或 body 下
20
+ # * [E15] <note> corresp 無對應的 <note>
20
21
  #
21
22
  # * 警告類型
22
23
  # * [W01] 夾注包夾注
@@ -33,7 +34,7 @@ class CBETA::P5aChecker
33
34
  @xml_root = xml_root
34
35
  @figures = figures
35
36
  @log = log
36
- @errors = ''
37
+ @errors = []
37
38
  @g_errors = {}
38
39
  end
39
40
 
@@ -118,17 +119,17 @@ class CBETA::P5aChecker
118
119
  def display_errors
119
120
  @g_errors.keys.sort.each do |k|
120
121
  s = @g_errors[k].to_a.join(',')
121
- @errors << "#{k} 無缺字資料,出現於:#{s}\n"
122
+ @errors << "#{k} 無缺字資料,出現於:#{s}"
122
123
  end
123
124
 
124
125
  if @errors.empty?
125
126
  puts "檢查完成,未發現錯誤。"
126
127
  elsif @log.nil?
127
- puts "發現錯誤:"
128
- puts @errors
128
+ puts "發現 #{@errors.size} 錯誤:"
129
+ puts @errors.join("\n")
129
130
  else
130
- File.write(@log, @errors)
131
- puts "發現錯誤,請查看 #{@log}"
131
+ File.write(@log, @errors.join("\n"))
132
+ puts "發現 #{@errors.size} 錯誤,請查看 #{@log}"
132
133
  end
133
134
  end
134
135
 
@@ -197,13 +198,9 @@ class CBETA::P5aChecker
197
198
  end
198
199
 
199
200
  def e_note(e)
200
- if e.parent.name == 'div'
201
- error "[E11] note 直接出現在 div 下"
202
- end
203
-
204
- if e.parent.name == 'lg'
205
- error "[E12] note 直接出現在 lg 下"
206
- end
201
+ error "[E11] note 直接出現在 div 下" if e.parent.name == 'div'
202
+ error "[E12] note 直接出現在 lg 下" if e.parent.name == 'lg'
203
+ e_note_corresp(e) if e.key?('corresp')
207
204
 
208
205
  unless e['place'] == 'inline'
209
206
  traverse(e)
@@ -213,11 +210,18 @@ class CBETA::P5aChecker
213
210
  if @element_stack.include?('inline_note')
214
211
  error "[W01] 夾注包夾注"
215
212
  end
213
+
216
214
  @element_stack << 'inline_note'
217
215
  traverse(e)
218
216
  @element_stack.pop
219
217
  end
220
218
 
219
+ def e_note_corresp(e)
220
+ n = e['corresp'].delete_prefix('#')
221
+ return if @notes.include?(n)
222
+ error "[E15] note corresp #{n} 無對應 note"
223
+ end
224
+
221
225
  def e_p(e)
222
226
  if e.parent.name == 'list'
223
227
  error "[E10] p 不應直接出現在 list 下"
@@ -261,8 +265,8 @@ class CBETA::P5aChecker
261
265
  end
262
266
 
263
267
  def error(msg)
264
- s = "#{msg}, #{@basename}, lb: #{@lb}\n"
265
- print s
268
+ s = "#{msg}, #{@basename}, lb: #{@lb}"
269
+ puts "\n#{s}"
266
270
  @errors << s
267
271
  end
268
272
 
@@ -281,7 +285,7 @@ class CBETA::P5aChecker
281
285
 
282
286
  s = File.read(fn)
283
287
  if s.include? "\u200B"
284
- @errors << "#{@basename} 含有 U+200B Zero Width Space 字元\n"
288
+ @errors << "#{@basename} 含有 U+200B Zero Width Space 字元"
285
289
  end
286
290
 
287
291
  doc = Nokogiri::XML(s)
@@ -292,7 +296,7 @@ class CBETA::P5aChecker
292
296
  @element_stack = []
293
297
  traverse(doc.root)
294
298
  else
295
- @errors << "錯誤: #{@basename} not well-formed\n"
299
+ @errors << "錯誤: #{@basename} not well-formed"
296
300
  end
297
301
  end
298
302
 
@@ -315,7 +319,7 @@ class CBETA::P5aChecker
315
319
  end
316
320
 
317
321
  def handle_vol(folder)
318
- puts "check vol: #{File.basename(folder)}"
322
+ print "\rcheck vol: #{File.basename(folder)} "
319
323
  Dir.entries(folder).sort.each do |f|
320
324
  next if f.start_with? '.'
321
325
  path = File.join(folder, f)
data/lib/cbeta.rb CHANGED
@@ -12,6 +12,16 @@ class CBETA
12
12
  DATA = File.join(File.dirname(__FILE__), 'data')
13
13
  PUNCS = ',.()[] 。‧.,、;?!:︰/()「」『』《》<>〈〉〔〕[]【】〖〗〃…—─ ~│┬▆△*+-='
14
14
 
15
+ # 經號 (不含 Canon ID):
16
+ # 四碼數字 T0001
17
+ # 四碼數字 + 英文字母 T0150A, T0128a
18
+ # 英文字母 + 三碼數字 JA041, ZWa073
19
+ WORK_PART = '\d{4}[a-zA-Z]?|[ABa]\d{3}'
20
+
21
+ # XML file 主檔名
22
+ # GA010n0009
23
+ BASENAME = "(?:#{CANON})\\d{2,3}n(?:#{WORK_PART})"
24
+
15
25
  # 由 行首資訊 取得 藏經 ID
16
26
  # @param linehead[String] 行首資訊, 例如 "T01n0001_p0001a01" 或 "GA009n0008_p0003a01"
17
27
  # @return [String] 藏經 ID,例如 "T" 或 "GA"
@@ -37,6 +47,8 @@ class CBETA
37
47
  # @param lb[String] 例如 "0001a01" 或 "0757b29"
38
48
  # @return [String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
39
49
  def self.get_linehead(file_basename, lb)
50
+ return nil if file_basename.nil?
51
+
40
52
  if file_basename.match(/^(T\d\dn0220)/)
41
53
  r = $1
42
54
  else
@@ -46,6 +58,22 @@ class CBETA
46
58
  r += 'p' + lb
47
59
  r
48
60
  end
61
+
62
+ # 由 XML檔主檔名 取得 典籍編號
63
+ # @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
64
+ # @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
65
+ def self.get_work_id_from_file_basename(fn)
66
+ r = fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
67
+ r = 'T0220' if r.start_with? 'T0220'
68
+ r
69
+ end
70
+
71
+ # 由 行首資訊 取得 典籍編號
72
+ # @param linehead[String] CBETA 行首資訊,例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
73
+ # @return [String] 典籍編號,例如 "T0001" 或 "T1510a"
74
+ def self.get_work_id_from_linehead(linehead)
75
+ linehead.sub(/^(#{CANON})\d{2,3}n(#{WORK_PART}).*$/, '\1\2')
76
+ end
49
77
 
50
78
  # 由 冊號 及 典籍編號 取得 XML 主檔名
51
79
  # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
@@ -114,24 +142,13 @@ class CBETA
114
142
  # ex: J36nB348_p0284c01
115
143
  # @return [String] XML檔相對路徑,例如 "GA/GA009/GA009n0008.xml"
116
144
  def self.linehead_to_xml_file_path(linehead)
117
- # 經號: 四碼數字 + 英文字母 或如 嘉興藏 英文字母 + 三碼數字
118
- w = '(?:\d+[a-zA-Z]?|[AB]\d{3})'
119
- if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n#{w}).*$/)
145
+ if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n(?:#{WORK_PART})).*$/)
120
146
  File.join(m[:canon], m[:vol], m[:work]+'.xml')
121
147
  else
122
148
  nil
123
149
  end
124
- end
125
-
126
- # 由 XML檔主檔名 取得 典籍編號
127
- # @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
128
- # @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
129
- def self.get_work_id_from_file_basename(fn)
130
- r = fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
131
- r = 'T0220' if r.start_with? 'T0220'
132
- r
133
- end
134
-
150
+ end
151
+
135
152
  # 由「藏經 ID」取得「排序用編號」,例如:傳入 "T" 回傳 "A";傳入 "X" 回傳 "B"
136
153
  # @param canon [String] 藏經 ID
137
154
  # @return [String] 排序用編號
data.tar.gz.sig ADDED
Binary file
metadata CHANGED
@@ -1,13 +1,40 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.16
4
+ version: 3.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  bindir: bin
9
- cert_chain: []
10
- date: 2025-07-29 00:00:00.000000000 Z
9
+ cert_chain:
10
+ - |
11
+ -----BEGIN CERTIFICATE-----
12
+ MIIEaDCCAtCgAwIBAgIBATANBgkqhkiG9w0BAQsFADA9MQ8wDQYDVQQDDAZ6aG91
13
+ YngxFTATBgoJkiaJk/IsZAEZFgVnbWFpbDETMBEGCgmSJomT8ixkARkWA2NvbTAe
14
+ Fw0yNTEwMTMwNjM4MjdaFw0yNjEwMTMwNjM4MjdaMD0xDzANBgNVBAMMBnpob3Vi
15
+ eDEVMBMGCgmSJomT8ixkARkWBWdtYWlsMRMwEQYKCZImiZPyLGQBGRYDY29tMIIB
16
+ ojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAiyVQ5WW6oy4b8drYYebSMVi+
17
+ EC1xHNgAuTmlKF0w/Z2mRwL7rTOt/d3zUpTGRwBiqz3JF4ZAJksei6y7UzbA4qDb
18
+ WZDv+s6y6SZfeB+1JLDQOGJ+XxiM6ezsgMyZIeAOXciELTJh/x/R6pRatVDPyOAB
19
+ E1uv/2qGTP8Vy76fH0N9E5WU7xccPWR9uFlPKcpiAS1werNVS8/UCSyeS90o5ckE
20
+ rp5jOTpy7Dtzlz82qMDFGFaLh1Zo7SYeNEJlroZYec9vdnYUWmyT52l12IH6uFll
21
+ DvnhRkwVIdQ9l2as1PQx9eg69Jf9qSCAuOXMVojNkwWLsezyjB5CcidL8wx8Sifg
22
+ mKWoeNrWYbwH1h9B1dzX2jIwqcr8BNI2usK42YWyxiJGbQr+SBHhXEt25s3AVH+P
23
+ YUby2af2Gcb5ltpqqdXFoO8PpDODWtUcHM3NTL2uGbKvq1Ai0GhPJNHPco2SW9DA
24
+ AGQFSEgJj7BlHDHcYpOXfsuQxnKXeeCw6dVmyYfPAgMBAAGjczBxMAkGA1UdEwQC
25
+ MAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQWBBTqBx17iPvliZ0RKL1RZluB50Uc/jAb
26
+ BgNVHREEFDASgRB6aG91YnhAZ21haWwuY29tMBsGA1UdEgQUMBKBEHpob3VieEBn
27
+ bWFpbC5jb20wDQYJKoZIhvcNAQELBQADggGBAF+XmQPhEcuiRl/Zb+1vIBnR8Z8Q
28
+ mTJm4inGy8boLAz71Pg04yI+moj7A0CSS/xbv1MtZqOzTxGFm16xLczY4k6BuRof
29
+ C9usw3Z7val3rHRzmdhLqkX4ALrLzd/G0uirARIQ08N5r0ETIkNFCdePtRHH8uk3
30
+ Er0v4v4QT9ATyrAs4iImWlhmWZIEXOdhBqW01c9/cm5Gcg/WBjdEplpJtGGnMkdc
31
+ zQQscleWWbCX7QKjR9rSCPNteUuzAoS8gNCbLNQF88aOFzm704dePli3WNL/GTvy
32
+ frO5NqUXpwCOzgCbMKfxYBw87WDA8yAE2Cn3Kw1bvX/fheUNr++QyVY8sqVDOlUC
33
+ TovfZCtNma/D0DD8q8enl7UEwKWEs3IC+8bu6jlzYn/6BcLTkKwzB8NXSLVGkFva
34
+ xyQse8slbARanDyyT6HCYtlaKD9WYw647tkJhyY2GniloCNcOJS0URvhlulLzJYg
35
+ 9CrAxVAO2NMeM6tbxg1VjYbqopjoLCwLdAJouA==
36
+ -----END CERTIFICATE-----
37
+ date: 2025-11-12 00:00:00.000000000 Z
11
38
  dependencies:
12
39
  - !ruby/object:Gem::Dependency
13
40
  name: unihan2
@@ -118,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
145
  - !ruby/object:Gem::Version
119
146
  version: '0'
120
147
  requirements: []
121
- rubygems_version: 3.6.9
148
+ rubygems_version: 3.7.2
122
149
  specification_version: 4
123
150
  summary: CBETA Tools
124
151
  test_files: []
metadata.gz.sig ADDED
Binary file