cbeta 3.5.5 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/cbeta_share.rb +7 -1
- data/lib/cbeta/p5a_checker.rb +162 -0
- data/lib/cbeta.rb +1 -0
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d57589aae785253515b3d2e73f7e5ee0f2267eb5cfd78ffc07a31cda853e0dc4
|
4
|
+
data.tar.gz: 636713370e0c81c2b25121be8aa58f4d959eb24c0297f64d6a6300431848b805
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de9137c36aad2f633b1b682d7b22a32d99d260b7e2d5a506bd404a2a13596098755884da9292dacf6692f9751b1744c9f8eccdd5aacc8670c66cadd0ab94a287
|
7
|
+
data.tar.gz: 8885756792aeb5e473fbf4ffa638c03ba330156a10e7f800464146c93b81defb9a7b8ed5103c6b9cee010c6714b3e361eb1ae0e685dd9f94fc29eb9f2dab1fdc
|
data/lib/cbeta/cbeta_share.rb
CHANGED
@@ -0,0 +1,162 @@
|
|
1
|
+
require_relative 'cbeta_share'
|
2
|
+
|
3
|
+
# 檢查 CBETA XML P5a
|
4
|
+
class CBETA::P5aChecker
|
5
|
+
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
6
|
+
# @param figures [String] 插圖 路徑 (可由 https://github.com/cbeta-git/CBR2X-figures 取得)
|
7
|
+
# @param log [String] Log file path
|
8
|
+
def initialize(xml_root: nil, figures: nil, log: nil)
|
9
|
+
@gaijis = CBETA::Gaiji.new
|
10
|
+
@xml_root = xml_root
|
11
|
+
@figures = figures
|
12
|
+
@log = log
|
13
|
+
end
|
14
|
+
|
15
|
+
def check
|
16
|
+
@errors = ''
|
17
|
+
@g_errors = {}
|
18
|
+
puts "xml: #{@xml_root}"
|
19
|
+
each_canon(@xml_root) do |c|
|
20
|
+
@canon = c
|
21
|
+
path = File.join(@xml_root, @canon)
|
22
|
+
handle_canon(path)
|
23
|
+
end
|
24
|
+
|
25
|
+
@g_errors.keys.sort.each do |k|
|
26
|
+
s = @g_errors[k].to_a.join(',')
|
27
|
+
@errors << "#{k} 無缺字資料,出現於:#{s}\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
if @errors.empty?
|
31
|
+
puts "檢查完成,未發現錯誤。"
|
32
|
+
elsif @log.nil?
|
33
|
+
puts "\n發現錯誤:"
|
34
|
+
puts @errors
|
35
|
+
else
|
36
|
+
File.write(@log, @errors)
|
37
|
+
puts "\n發現錯誤,請查看 #{@log}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
include CbetaShare
|
44
|
+
|
45
|
+
def chk_text(node)
|
46
|
+
return if node.text.strip.empty?
|
47
|
+
if node.parent.name == 'div'
|
48
|
+
error "lb: #{@lb}, text: #{node.text.inspect}", type: "[E02] 文字直接出現在 div 下"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def e_g(e)
|
53
|
+
gid = e['ref'][1..-1]
|
54
|
+
unless @gaijis.key? gid
|
55
|
+
@g_errors[gid] = Set.new unless @g_errors.key? gid
|
56
|
+
@g_errors[gid] << @basename
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def e_graphic(e)
|
61
|
+
url = File.basename(e['url'])
|
62
|
+
fn = File.join(@figures, @canon, url)
|
63
|
+
unless File.exist? fn
|
64
|
+
error "圖檔 #{url} 不存在"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def e_lb(e)
|
69
|
+
return if e['type']=='old'
|
70
|
+
unless e['n'].match(/^[a-z\d]\d{3}[a-z]\d+$/)
|
71
|
+
error "lb format error: #{e['n']}"
|
72
|
+
end
|
73
|
+
|
74
|
+
@lb = e['n']
|
75
|
+
ed_lb = "#{e['ed']}#{@lb}"
|
76
|
+
if @lbs.include? ed_lb
|
77
|
+
unless e['ed'].start_with?('R')
|
78
|
+
error "lb: #{@lb}, ed: #{e['ed']}", type: "[E01] 行號重複"
|
79
|
+
end
|
80
|
+
else
|
81
|
+
@lbs << ed_lb
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def e_lem(e)
|
86
|
+
unless e.key?('wit')
|
87
|
+
error "lem 缺少 wit 屬性"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def e_rdg(e)
|
92
|
+
return if e['type'] == 'cbetaRemark'
|
93
|
+
unless e.key?('wit')
|
94
|
+
error "rdg 缺少 wit 屬性, lb: #{@lb}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def error(msg, type: nil)
|
99
|
+
s = ''
|
100
|
+
s << "#{type}: " unless type.nil?
|
101
|
+
s << "#{@basename}, #{msg}"
|
102
|
+
puts s
|
103
|
+
@errors << s + "\n"
|
104
|
+
end
|
105
|
+
|
106
|
+
def handle_canon(folder)
|
107
|
+
Dir.entries(folder).sort.each do |f|
|
108
|
+
next if f.start_with? '.'
|
109
|
+
@vol = f
|
110
|
+
$stderr.puts @vol + ' '
|
111
|
+
path = File.join(folder, @vol)
|
112
|
+
handle_vol(path)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def handle_file(fn)
|
117
|
+
@basename = File.basename(fn)
|
118
|
+
|
119
|
+
s = File.read(fn)
|
120
|
+
if s.include? "\u200B"
|
121
|
+
@errors << "#{@basename} 含有 U+200B Zero Width Space 字元\n"
|
122
|
+
end
|
123
|
+
|
124
|
+
doc = Nokogiri::XML(s)
|
125
|
+
if doc.errors.empty?
|
126
|
+
doc.remove_namespaces!
|
127
|
+
@lbs = Set.new
|
128
|
+
traverse(doc.root)
|
129
|
+
else
|
130
|
+
@errors << "錯誤: #{@basename} not well-formed\n"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def handle_node(e)
|
135
|
+
case e.name
|
136
|
+
when 'g' then e_g(e)
|
137
|
+
when 'graphic' then e_graphic(e)
|
138
|
+
when 'lb' then e_lb(e)
|
139
|
+
when 'lem' then e_lem(e)
|
140
|
+
when 'rdg' then e_rdg(e)
|
141
|
+
else traverse(e)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def handle_vol(folder)
|
146
|
+
Dir.entries(folder).sort.each do |f|
|
147
|
+
next if f.start_with? '.'
|
148
|
+
path = File.join(folder, f)
|
149
|
+
handle_file(path)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def traverse(e)
|
154
|
+
e.children.each { |c|
|
155
|
+
if c.text?
|
156
|
+
chk_text(c)
|
157
|
+
elsif e.element?
|
158
|
+
handle_node(c)
|
159
|
+
end
|
160
|
+
}
|
161
|
+
end
|
162
|
+
end
|
data/lib/cbeta.rb
CHANGED
@@ -282,6 +282,7 @@ require 'cbeta/canon'
|
|
282
282
|
require 'cbeta/char_count'
|
283
283
|
require 'cbeta/char_freq'
|
284
284
|
require 'cbeta/html_to_pdf'
|
285
|
+
require 'cbeta/p5a_checker'
|
285
286
|
require 'cbeta/p5a_to_html'
|
286
287
|
require 'cbeta/p5a_to_html_for_every_edition'
|
287
288
|
require 'cbeta/p5a_to_html_for_pdf'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unihan2
|
@@ -16,20 +16,20 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.2'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 1.
|
22
|
+
version: 1.2.0
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: '1.
|
29
|
+
version: '1.2'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 1.
|
32
|
+
version: 1.2.0
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: nokogiri
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -85,6 +85,7 @@ files:
|
|
85
85
|
- lib/cbeta/gaiji.rb
|
86
86
|
- lib/cbeta/html_to_pdf.rb
|
87
87
|
- lib/cbeta/html_to_text.rb
|
88
|
+
- lib/cbeta/p5a_checker.rb
|
88
89
|
- lib/cbeta/p5a_to_html.rb
|
89
90
|
- lib/cbeta/p5a_to_html_for_every_edition.rb
|
90
91
|
- lib/cbeta/p5a_to_html_for_pdf.rb
|