cbeta 3.6.2 → 3.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/p5a_checker.rb +55 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49e62ac8c7805abccecdd47d26c301797f3b5262659f1aca46dd4d314dfd50ae
|
4
|
+
data.tar.gz: 72e3a47fd5fe23c0ecef82d9cbcbfd9d1c4079a41133c1325d1f5220b68f24a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab60c18e220fad95a78aac7700726a2e629759933cac1b489f05890141ffdaa770a650479e56f8316b65592979ba9241a9a8f1015070af8883e0f19744b526bd
|
7
|
+
data.tar.gz: e5cd339be96a043c1186e2968cf12f6c91f779039130024423859729d58c180fa262a6c5d3447b85ebecb0eaf81b5513f48f24a0d2f8275d21a2ac779f189055
|
data/lib/cbeta/p5a_checker.rb
CHANGED
@@ -1,7 +1,15 @@
|
|
1
1
|
require_relative 'cbeta_share'
|
2
2
|
|
3
3
|
# 檢查 CBETA XML P5a
|
4
|
+
#
|
5
|
+
# * 錯誤類型
|
6
|
+
# * [E01] 行號重複
|
7
|
+
# * [E02] 文字直接出現在 div 下
|
8
|
+
# * [E03] 星號校勘 app 沒有對應的 note
|
9
|
+
# * 警告類型
|
10
|
+
# * [W01] 夾注包夾注
|
4
11
|
class CBETA::P5aChecker
|
12
|
+
|
5
13
|
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
6
14
|
# @param figures [String] 插圖 路徑 (可由 https://github.com/cbeta-git/CBR2X-figures 取得)
|
7
15
|
# @param log [String] Log file path
|
@@ -14,6 +22,13 @@ class CBETA::P5aChecker
|
|
14
22
|
@g_errors = {}
|
15
23
|
end
|
16
24
|
|
25
|
+
# 檢查全部 CBETA XML P5a
|
26
|
+
# @example
|
27
|
+
# CBETA::P5aChecker.new(
|
28
|
+
# xml_root: '~/git-repos/cbeta-xml-p5a',
|
29
|
+
# figures: '~/git-repos/CBR2X-figures',
|
30
|
+
# log: '~/log/check-cbeta-xml.log'
|
31
|
+
# ).check
|
17
32
|
def check
|
18
33
|
puts "xml: #{@xml_root}"
|
19
34
|
each_canon(@xml_root) do |c|
|
@@ -25,6 +40,31 @@ class CBETA::P5aChecker
|
|
25
40
|
display_errors
|
26
41
|
end
|
27
42
|
|
43
|
+
# 檢查某部藏經
|
44
|
+
# @param canon [String] 藏經 ID, example: "T"
|
45
|
+
def check_canon(canon)
|
46
|
+
@canon = canon
|
47
|
+
path = File.join(@xml_root, @canon)
|
48
|
+
handle_canon(path)
|
49
|
+
display_errors
|
50
|
+
end
|
51
|
+
|
52
|
+
# 檢查某一冊
|
53
|
+
# @param vol [String] 冊號, example: "T01"
|
54
|
+
def check_vol(vol)
|
55
|
+
@vol = vol
|
56
|
+
@canon = CBETA.get_canon_from_vol(vol)
|
57
|
+
path = File.join(@xml_root, @canon, vol)
|
58
|
+
handle_vol(path)
|
59
|
+
display_errors
|
60
|
+
end
|
61
|
+
|
62
|
+
# 檢查單一檔案
|
63
|
+
# @example
|
64
|
+
# CBETA::P5aChecker.new(
|
65
|
+
# figures: '~/git-repos/CBR2X-figures',
|
66
|
+
# log: '~/log/check-cbeta-xml.log'
|
67
|
+
# ).check_file('~/git-repos/cbeta-xml-p5a/A/A110/A110n1490.xml')
|
28
68
|
def check_file(fn)
|
29
69
|
handle_file(fn)
|
30
70
|
display_errors
|
@@ -50,11 +90,11 @@ class CBETA::P5aChecker
|
|
50
90
|
if @errors.empty?
|
51
91
|
puts "檢查完成,未發現錯誤。"
|
52
92
|
elsif @log.nil?
|
53
|
-
puts "
|
93
|
+
puts "發現錯誤:"
|
54
94
|
puts @errors
|
55
95
|
else
|
56
96
|
File.write(@log, @errors)
|
57
|
-
puts "
|
97
|
+
puts "發現錯誤,請查看 #{@log}"
|
58
98
|
end
|
59
99
|
end
|
60
100
|
|
@@ -108,6 +148,16 @@ class CBETA::P5aChecker
|
|
108
148
|
traverse(e)
|
109
149
|
end
|
110
150
|
|
151
|
+
def e_note(e)
|
152
|
+
return unless e['place'] == 'inline'
|
153
|
+
if @element_stack.include?('inline_note')
|
154
|
+
@errors << "[W01] 夾注包夾注: #{@basename}, lb: #{@lb}\n"
|
155
|
+
end
|
156
|
+
@element_stack << 'inline_note'
|
157
|
+
traverse(e)
|
158
|
+
@element_stack.pop
|
159
|
+
end
|
160
|
+
|
111
161
|
def e_rdg(e)
|
112
162
|
return if e['type'] == 'cbetaRemark'
|
113
163
|
unless e.key?('wit')
|
@@ -127,7 +177,6 @@ class CBETA::P5aChecker
|
|
127
177
|
Dir.entries(folder).sort.each do |f|
|
128
178
|
next if f.start_with? '.'
|
129
179
|
@vol = f
|
130
|
-
$stderr.print "#{@vol} "
|
131
180
|
path = File.join(folder, @vol)
|
132
181
|
handle_vol(path)
|
133
182
|
end
|
@@ -147,6 +196,7 @@ class CBETA::P5aChecker
|
|
147
196
|
doc.remove_namespaces!
|
148
197
|
@lbs = Set.new
|
149
198
|
read_notes(doc)
|
199
|
+
@element_stack = []
|
150
200
|
traverse(doc.root)
|
151
201
|
else
|
152
202
|
@errors << "錯誤: #{@basename} not well-formed\n"
|
@@ -160,12 +210,14 @@ class CBETA::P5aChecker
|
|
160
210
|
when 'graphic' then e_graphic(e)
|
161
211
|
when 'lb' then e_lb(e)
|
162
212
|
when 'lem' then e_lem(e)
|
213
|
+
when 'note' then e_note(e)
|
163
214
|
when 'rdg' then e_rdg(e)
|
164
215
|
else traverse(e)
|
165
216
|
end
|
166
217
|
end
|
167
218
|
|
168
219
|
def handle_vol(folder)
|
220
|
+
puts "check vol: #{File.basename(folder)}"
|
169
221
|
Dir.entries(folder).sort.each do |f|
|
170
222
|
next if f.start_with? '.'
|
171
223
|
path = File.join(folder, f)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.6.
|
4
|
+
version: 3.6.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-04-
|
11
|
+
date: 2025-04-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unihan2
|