cbeta 3.6.5 → 3.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/p5a_checker.rb +64 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 72686576079012f5ab43a671e344ca818f613c0458222b61320578e4753b5667
|
4
|
+
data.tar.gz: a30557a1b5b0a386e5fda0c833fec10ecb033f4968a8ddd300cb4d0bb01d39de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a94542b928cd00a7f92323cb2953132095cd477f9808fcf30ffe002b3163a92312a656711173460c02634100d031113eaeb999e2139b9ab956b2e0a87b4d304
|
7
|
+
data.tar.gz: 4c4d735c996635dbbfe5219c28dfb0b543d7cdd66a6c7c5e2632f920aa4f5c7a1bd3199a3c0fa03775db8a32f18df7830d79b735fec800098f3153ac495487f4
|
data/lib/cbeta/p5a_checker.rb
CHANGED
@@ -6,8 +6,16 @@ require_relative 'cbeta_share'
|
|
6
6
|
# * [E01] 行號重複
|
7
7
|
# * [E02] 文字直接出現在 div 下
|
8
8
|
# * [E03] 星號校勘 app 沒有對應的 note
|
9
|
+
# * [E04] rdg 缺少 wit 屬性"
|
10
|
+
# * [E05] 圖檔 不存在
|
11
|
+
# * [E06] lb format error
|
12
|
+
# * [E07] lem 缺少 wit 屬性
|
13
|
+
# * [E08] item 下有多個 list
|
14
|
+
# * [E09] table cols 屬性值錯誤
|
15
|
+
#
|
9
16
|
# * 警告類型
|
10
17
|
# * [W01] 夾注包夾注
|
18
|
+
# * [W02] 出現罕用字元
|
11
19
|
class CBETA::P5aChecker
|
12
20
|
|
13
21
|
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
@@ -77,7 +85,18 @@ class CBETA::P5aChecker
|
|
77
85
|
def chk_text(node)
|
78
86
|
return if node.text.strip.empty?
|
79
87
|
if node.parent.name == 'div'
|
80
|
-
error "
|
88
|
+
error "[E02] 文字直接出現在 div 下, text: #{node.text.inspect}"
|
89
|
+
end
|
90
|
+
if node.text =~ /(\$|\{|\})/
|
91
|
+
char = $1
|
92
|
+
|
93
|
+
# 允許的已知用例:
|
94
|
+
# ZW07n0065_p0409a03:{本}續,大分為三。……初對辨題名者,梵云……,此云『吉
|
95
|
+
if char == '{' and @basename == 'ZW07n0065.xml' and @lb == '0409a03'
|
96
|
+
return
|
97
|
+
end
|
98
|
+
|
99
|
+
error "[W02] 出現罕用字元: char: #{char}"
|
81
100
|
end
|
82
101
|
end
|
83
102
|
|
@@ -102,7 +121,7 @@ class CBETA::P5aChecker
|
|
102
121
|
if e['type'] == 'star'
|
103
122
|
n = e['corresp'].delete_prefix('#')
|
104
123
|
unless @notes.include?(n)
|
105
|
-
error "
|
124
|
+
error "[E03] 星號校勘 app 沒有對應的 note, corresp: #{n}"
|
106
125
|
end
|
107
126
|
end
|
108
127
|
traverse(e)
|
@@ -120,22 +139,30 @@ class CBETA::P5aChecker
|
|
120
139
|
url = File.basename(e['url'])
|
121
140
|
fn = File.join(@figures, @canon, url)
|
122
141
|
unless File.exist? fn
|
123
|
-
error "圖檔 #{url}
|
142
|
+
error "[E05] 圖檔 不存在, url: #{url}"
|
124
143
|
end
|
125
144
|
end
|
145
|
+
|
146
|
+
def e_item(e)
|
147
|
+
lists = e.xpath('list')
|
148
|
+
if lists.size > 1
|
149
|
+
error "[E08] item 下有多個 list"
|
150
|
+
end
|
151
|
+
traverse(e)
|
152
|
+
end
|
126
153
|
|
127
154
|
def e_lb(e)
|
128
155
|
return if e['type']=='old'
|
129
156
|
unless e['n'].match(/^[a-z\d]\d{3}[a-z]\d+$/)
|
130
|
-
error "lb format error: #{e['n']}"
|
157
|
+
error "[E06] lb format error: #{e['n']}"
|
131
158
|
end
|
132
159
|
|
160
|
+
return if e['ed'] == 'R'
|
161
|
+
|
133
162
|
@lb = e['n']
|
134
163
|
ed_lb = "#{e['ed']}#{@lb}"
|
135
164
|
if @lbs.include? ed_lb
|
136
|
-
|
137
|
-
error "lb: #{@lb}, ed: #{e['ed']}", type: "[E01] 行號重複"
|
138
|
-
end
|
165
|
+
error "[E01] 行號重複, ed: #{e['ed']}"
|
139
166
|
else
|
140
167
|
@lbs << ed_lb
|
141
168
|
end
|
@@ -143,7 +170,7 @@ class CBETA::P5aChecker
|
|
143
170
|
|
144
171
|
def e_lem(e)
|
145
172
|
unless e.key?('wit')
|
146
|
-
error "lem 缺少 wit 屬性"
|
173
|
+
error "[E07] lem 缺少 wit 屬性"
|
147
174
|
end
|
148
175
|
traverse(e)
|
149
176
|
end
|
@@ -151,7 +178,7 @@ class CBETA::P5aChecker
|
|
151
178
|
def e_note(e)
|
152
179
|
return unless e['place'] == 'inline'
|
153
180
|
if @element_stack.include?('inline_note')
|
154
|
-
|
181
|
+
error "[W01] 夾注包夾注"
|
155
182
|
end
|
156
183
|
@element_stack << 'inline_note'
|
157
184
|
traverse(e)
|
@@ -161,16 +188,35 @@ class CBETA::P5aChecker
|
|
161
188
|
def e_rdg(e)
|
162
189
|
return if e['type'] == 'cbetaRemark'
|
163
190
|
unless e.key?('wit')
|
164
|
-
error "rdg 缺少 wit
|
191
|
+
error "[E04] rdg 缺少 wit 屬性"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def e_table(e)
|
196
|
+
max_cols = 0
|
197
|
+
e.xpath('row').each do |row|
|
198
|
+
cols = 0
|
199
|
+
row.xpath('cell').each do |cell|
|
200
|
+
if cell.key?('cols')
|
201
|
+
cols += cell['cols'].to_i
|
202
|
+
else
|
203
|
+
cols += 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
max_cols = cols if cols > max_cols
|
165
207
|
end
|
208
|
+
|
209
|
+
if e['cols'].to_i != max_cols
|
210
|
+
error "[E09] table cols 屬性值錯誤, table/@cols: #{e['cols']}, 根據 cell 計算的 cols: #{max_cols}"
|
211
|
+
end
|
212
|
+
|
213
|
+
traverse(e)
|
166
214
|
end
|
167
215
|
|
168
|
-
def error(msg
|
169
|
-
s =
|
170
|
-
s
|
171
|
-
|
172
|
-
puts s
|
173
|
-
@errors << s + "\n"
|
216
|
+
def error(msg)
|
217
|
+
s = "#{msg}, #{@basename}, lb: #{@lb}\n"
|
218
|
+
print s
|
219
|
+
@errors << s
|
174
220
|
end
|
175
221
|
|
176
222
|
def handle_canon(folder)
|
@@ -208,10 +254,12 @@ class CBETA::P5aChecker
|
|
208
254
|
when 'app' then e_app(e)
|
209
255
|
when 'g' then e_g(e)
|
210
256
|
when 'graphic' then e_graphic(e)
|
257
|
+
when 'item' then e_item(e)
|
211
258
|
when 'lb' then e_lb(e)
|
212
259
|
when 'lem' then e_lem(e)
|
213
260
|
when 'note' then e_note(e)
|
214
261
|
when 'rdg' then e_rdg(e)
|
262
|
+
when 'table' then e_table(e)
|
215
263
|
else traverse(e)
|
216
264
|
end
|
217
265
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.6.
|
4
|
+
version: 3.6.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unihan2
|