cbeta 3.5.5 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2eb898277b45bb91b77ad88a0a064ec1780647cfbb8956fa6d407aa5250fef6d
4
- data.tar.gz: 70d0c1f69410cff20427dc4b0cb5016c11a360c47b2fd0f24e0b38d84b9b6068
3
+ metadata.gz: 5d0afcec06986d4abb932de0ec7095d8ecda4eeb3b7827798f1bf67e830baeea
4
+ data.tar.gz: 5af3540b82784566fefe59821c507cc36c66b56ca7855056573f66b31af14f14
5
5
  SHA512:
6
- metadata.gz: 2e37c7495f13476adf58ad47eb5cb435402085af2dbca6b20821b63f23b00c907c2fca028035b1ba1e365adddcb04fd281df6f04daa13a8c2132a94bd33fc1d1
7
- data.tar.gz: 7dbb43982e33c8a5d533d8fee814f418aebf0a2e8c40df9d59a68bd726f9de67ddb8c15aee521f37c14c8e792cc0f53ca2035598819076c630468b25d0fb4c9e
6
+ metadata.gz: 063bbde7b625bc97bb84a16eb47acb2da82876ab9e2474aec105e1da1dbe19e1f0a5e925f7c02ea1f59816429bc4d9f247f3e791e4137c4d870973f8946dce05
7
+ data.tar.gz: c274c8605e5cc1249edd667660d88314772bbb88c4e3c96342993ddc6840ceb25a9b28c87d7ed544f099890d90d99d1dded1068c67d65df1a6c455b877b4533c
@@ -1,4 +1,10 @@
1
1
  module CbetaShare
2
+ def each_canon(xml_root)
3
+ Dir.entries(xml_root).sort.each do |c|
4
+ next unless c.match(/^#{CBETA::CANON}$/)
5
+ yield(c)
6
+ end
7
+ end
2
8
 
3
9
  def to_html(e)
4
10
  e.to_xml(
@@ -8,4 +14,4 @@ module CbetaShare
8
14
  )
9
15
  end
10
16
 
11
- end
17
+ end
@@ -0,0 +1,184 @@
1
+ require_relative 'cbeta_share'
2
+
3
+ # 檢查 CBETA XML P5a
4
+ class CBETA::P5aChecker
5
+ # @param xml_root [String] 來源 CBETA XML P5a 路徑
6
+ # @param figures [String] 插圖 路徑 (可由 https://github.com/cbeta-git/CBR2X-figures 取得)
7
+ # @param log [String] Log file path
8
+ def initialize(xml_root: nil, figures: nil, log: nil)
9
+ @gaijis = CBETA::Gaiji.new
10
+ @xml_root = xml_root
11
+ @figures = figures
12
+ @log = log
13
+ end
14
+
15
+ def check
16
+ @errors = ''
17
+ @g_errors = {}
18
+ puts "xml: #{@xml_root}"
19
+ each_canon(@xml_root) do |c|
20
+ @canon = c
21
+ path = File.join(@xml_root, @canon)
22
+ handle_canon(path)
23
+ end
24
+
25
+ @g_errors.keys.sort.each do |k|
26
+ s = @g_errors[k].to_a.join(',')
27
+ @errors << "#{k} 無缺字資料,出現於:#{s}\n"
28
+ end
29
+
30
+ if @errors.empty?
31
+ puts "檢查完成,未發現錯誤。"
32
+ elsif @log.nil?
33
+ puts "\n發現錯誤:"
34
+ puts @errors
35
+ else
36
+ File.write(@log, @errors)
37
+ puts "\n發現錯誤,請查看 #{@log}"
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ include CbetaShare
44
+
45
+ def chk_text(node)
46
+ return if node.text.strip.empty?
47
+ if node.parent.name == 'div'
48
+ error "lb: #{@lb}, text: #{node.text.inspect}", type: "[E02] 文字直接出現在 div 下"
49
+ end
50
+ end
51
+
52
+ def e_app(e)
53
+ if e['type'] == 'star'
54
+ n = e['corresp'].delete_prefix('#')
55
+ unless @notes.include?(n)
56
+ error "lb: #{@lb}, corresp: #{n}", type: "[E03] 星號校勘 app 沒有對應的 note"
57
+ end
58
+ end
59
+ traverse(e)
60
+ end
61
+
62
+ def e_g(e)
63
+ gid = e['ref'][1..-1]
64
+ unless @gaijis.key? gid
65
+ @g_errors[gid] = Set.new unless @g_errors.key? gid
66
+ @g_errors[gid] << @basename
67
+ end
68
+ end
69
+
70
+ def e_graphic(e)
71
+ url = File.basename(e['url'])
72
+ fn = File.join(@figures, @canon, url)
73
+ unless File.exist? fn
74
+ error "圖檔 #{url} 不存在"
75
+ end
76
+ end
77
+
78
+ def e_lb(e)
79
+ return if e['type']=='old'
80
+ unless e['n'].match(/^[a-z\d]\d{3}[a-z]\d+$/)
81
+ error "lb format error: #{e['n']}"
82
+ end
83
+
84
+ @lb = e['n']
85
+ ed_lb = "#{e['ed']}#{@lb}"
86
+ if @lbs.include? ed_lb
87
+ unless e['ed'].start_with?('R')
88
+ error "lb: #{@lb}, ed: #{e['ed']}", type: "[E01] 行號重複"
89
+ end
90
+ else
91
+ @lbs << ed_lb
92
+ end
93
+ end
94
+
95
+ def e_lem(e)
96
+ unless e.key?('wit')
97
+ error "lem 缺少 wit 屬性"
98
+ end
99
+ traverse(e)
100
+ end
101
+
102
+ def e_rdg(e)
103
+ return if e['type'] == 'cbetaRemark'
104
+ unless e.key?('wit')
105
+ error "rdg 缺少 wit 屬性, lb: #{@lb}"
106
+ end
107
+ end
108
+
109
+ def error(msg, type: nil)
110
+ s = ''
111
+ s << "#{type}: " unless type.nil?
112
+ s << "#{@basename}, #{msg}"
113
+ puts s
114
+ @errors << s + "\n"
115
+ end
116
+
117
+ def handle_canon(folder)
118
+ Dir.entries(folder).sort.each do |f|
119
+ next if f.start_with? '.'
120
+ @vol = f
121
+ $stderr.print "#{@vol} "
122
+ path = File.join(folder, @vol)
123
+ handle_vol(path)
124
+ end
125
+ end
126
+
127
+ def handle_file(fn)
128
+ @basename = File.basename(fn)
129
+
130
+ s = File.read(fn)
131
+ if s.include? "\u200B"
132
+ @errors << "#{@basename} 含有 U+200B Zero Width Space 字元\n"
133
+ end
134
+
135
+ doc = Nokogiri::XML(s)
136
+ if doc.errors.empty?
137
+ doc.remove_namespaces!
138
+ @lbs = Set.new
139
+ read_notes(doc)
140
+ traverse(doc.root)
141
+ else
142
+ @errors << "錯誤: #{@basename} not well-formed\n"
143
+ end
144
+ end
145
+
146
+ def handle_node(e)
147
+ case e.name
148
+ when 'app' then e_app(e)
149
+ when 'g' then e_g(e)
150
+ when 'graphic' then e_graphic(e)
151
+ when 'lb' then e_lb(e)
152
+ when 'lem' then e_lem(e)
153
+ when 'rdg' then e_rdg(e)
154
+ else traverse(e)
155
+ end
156
+ end
157
+
158
+ def handle_vol(folder)
159
+ Dir.entries(folder).sort.each do |f|
160
+ next if f.start_with? '.'
161
+ path = File.join(folder, f)
162
+ handle_file(path)
163
+ end
164
+ end
165
+
166
+ def read_notes(doc)
167
+ @notes = Set.new
168
+ doc.xpath('//note').each do |e|
169
+ if e.key?('n')
170
+ @notes << e['n']
171
+ end
172
+ end
173
+ end
174
+
175
+ def traverse(e)
176
+ e.children.each { |c|
177
+ if c.text?
178
+ chk_text(c)
179
+ elsif e.element?
180
+ handle_node(c)
181
+ end
182
+ }
183
+ end
184
+ end
data/lib/cbeta.rb CHANGED
@@ -282,6 +282,7 @@ require 'cbeta/canon'
282
282
  require 'cbeta/char_count'
283
283
  require 'cbeta/char_freq'
284
284
  require 'cbeta/html_to_pdf'
285
+ require 'cbeta/p5a_checker'
285
286
  require 'cbeta/p5a_to_html'
286
287
  require 'cbeta/p5a_to_html_for_every_edition'
287
288
  require 'cbeta/p5a_to_html_for_pdf'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.5.5
4
+ version: 3.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-31 00:00:00.000000000 Z
11
+ date: 2025-04-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unihan2
@@ -16,20 +16,20 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.1'
19
+ version: '1.2'
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.1.0
22
+ version: 1.2.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
27
  - - "~>"
28
28
  - !ruby/object:Gem::Version
29
- version: '1.1'
29
+ version: '1.2'
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 1.1.0
32
+ version: 1.2.0
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: nokogiri
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/cbeta/gaiji.rb
86
86
  - lib/cbeta/html_to_pdf.rb
87
87
  - lib/cbeta/html_to_text.rb
88
+ - lib/cbeta/p5a_checker.rb
88
89
  - lib/cbeta/p5a_to_html.rb
89
90
  - lib/cbeta/p5a_to_html_for_every_edition.rb
90
91
  - lib/cbeta/p5a_to_html_for_pdf.rb