cbeta 3.6.1 → 3.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/p5a_checker.rb +42 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 644f49e37566f3f90599849baa5b31c2df1a0ca28c76c530ec42ec8e839c5da5
|
4
|
+
data.tar.gz: 2293f8fcf48f5aceecf09e2731ef4c3825f6bc80ac97249105a8e435f1ad9100
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfdc23cf230a4e60a00085dade6a73f09352bc4ec0b24c29aaadb94c95e11fa692a3c4f1b7fb82c398b59238e36be365f65e0f0c8d67dcd52f24246318104d84
|
7
|
+
data.tar.gz: 272b18af22f0a121ba9d94311588602b1e4435782227759e6ca49d09c39cd42eb7cac56895a92671d598b218949810a832e60b201fa86e54706311f3bbef8eea
|
data/lib/cbeta/p5a_checker.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
1
|
require_relative 'cbeta_share'
|
2
2
|
|
3
3
|
# 檢查 CBETA XML P5a
|
4
|
+
# 錯誤類型
|
5
|
+
# [E01] 行號重複
|
6
|
+
# [E02] 文字直接出現在 div 下
|
7
|
+
# [E03] 星號校勘 app 沒有對應的 note
|
8
|
+
# 警告類型
|
9
|
+
# [W01] 夾注包夾注
|
4
10
|
class CBETA::P5aChecker
|
5
11
|
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
6
12
|
# @param figures [String] 插圖 路徑 (可由 https://github.com/cbeta-git/CBR2X-figures 取得)
|
@@ -10,11 +16,11 @@ class CBETA::P5aChecker
|
|
10
16
|
@xml_root = xml_root
|
11
17
|
@figures = figures
|
12
18
|
@log = log
|
19
|
+
@errors = ''
|
20
|
+
@g_errors = {}
|
13
21
|
end
|
14
22
|
|
15
23
|
def check
|
16
|
-
@errors = ''
|
17
|
-
@g_errors = {}
|
18
24
|
puts "xml: #{@xml_root}"
|
19
25
|
each_canon(@xml_root) do |c|
|
20
26
|
@canon = c
|
@@ -22,6 +28,26 @@ class CBETA::P5aChecker
|
|
22
28
|
handle_canon(path)
|
23
29
|
end
|
24
30
|
|
31
|
+
display_errors
|
32
|
+
end
|
33
|
+
|
34
|
+
def check_file(fn)
|
35
|
+
handle_file(fn)
|
36
|
+
display_errors
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
include CbetaShare
|
42
|
+
|
43
|
+
def chk_text(node)
|
44
|
+
return if node.text.strip.empty?
|
45
|
+
if node.parent.name == 'div'
|
46
|
+
error "lb: #{@lb}, text: #{node.text.inspect}", type: "[E02] 文字直接出現在 div 下"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def display_errors
|
25
51
|
@g_errors.keys.sort.each do |k|
|
26
52
|
s = @g_errors[k].to_a.join(',')
|
27
53
|
@errors << "#{k} 無缺字資料,出現於:#{s}\n"
|
@@ -37,17 +63,6 @@ class CBETA::P5aChecker
|
|
37
63
|
puts "\n發現錯誤,請查看 #{@log}"
|
38
64
|
end
|
39
65
|
end
|
40
|
-
|
41
|
-
private
|
42
|
-
|
43
|
-
include CbetaShare
|
44
|
-
|
45
|
-
def chk_text(node)
|
46
|
-
return if node.text.strip.empty?
|
47
|
-
if node.parent.name == 'div'
|
48
|
-
error "lb: #{@lb}, text: #{node.text.inspect}", type: "[E02] 文字直接出現在 div 下"
|
49
|
-
end
|
50
|
-
end
|
51
66
|
|
52
67
|
def e_app(e)
|
53
68
|
if e['type'] == 'star'
|
@@ -99,6 +114,16 @@ class CBETA::P5aChecker
|
|
99
114
|
traverse(e)
|
100
115
|
end
|
101
116
|
|
117
|
+
def e_note(e)
|
118
|
+
return unless e['place'] == 'inline'
|
119
|
+
if @element_stack.include?('inline_note')
|
120
|
+
@errors << "[W01] 夾注包夾注: #{@basename}, lb: #{@lb}\n"
|
121
|
+
end
|
122
|
+
@element_stack << 'inline_note'
|
123
|
+
traverse(e)
|
124
|
+
@element_stack.pop
|
125
|
+
end
|
126
|
+
|
102
127
|
def e_rdg(e)
|
103
128
|
return if e['type'] == 'cbetaRemark'
|
104
129
|
unless e.key?('wit')
|
@@ -126,7 +151,8 @@ class CBETA::P5aChecker
|
|
126
151
|
|
127
152
|
def handle_file(fn)
|
128
153
|
@basename = File.basename(fn)
|
129
|
-
|
154
|
+
@canon ||= CBETA.get_canon_id_from_linehead(@basename)
|
155
|
+
|
130
156
|
s = File.read(fn)
|
131
157
|
if s.include? "\u200B"
|
132
158
|
@errors << "#{@basename} 含有 U+200B Zero Width Space 字元\n"
|
@@ -137,6 +163,7 @@ class CBETA::P5aChecker
|
|
137
163
|
doc.remove_namespaces!
|
138
164
|
@lbs = Set.new
|
139
165
|
read_notes(doc)
|
166
|
+
@element_stack = []
|
140
167
|
traverse(doc.root)
|
141
168
|
else
|
142
169
|
@errors << "錯誤: #{@basename} not well-formed\n"
|
@@ -150,6 +177,7 @@ class CBETA::P5aChecker
|
|
150
177
|
when 'graphic' then e_graphic(e)
|
151
178
|
when 'lb' then e_lb(e)
|
152
179
|
when 'lem' then e_lem(e)
|
180
|
+
when 'note' then e_note(e)
|
153
181
|
when 'rdg' then e_rdg(e)
|
154
182
|
else traverse(e)
|
155
183
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.6.
|
4
|
+
version: 3.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-04-
|
11
|
+
date: 2025-04-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unihan2
|