cbeta 3.5.5 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2eb898277b45bb91b77ad88a0a064ec1780647cfbb8956fa6d407aa5250fef6d
4
- data.tar.gz: 70d0c1f69410cff20427dc4b0cb5016c11a360c47b2fd0f24e0b38d84b9b6068
3
+ metadata.gz: d57589aae785253515b3d2e73f7e5ee0f2267eb5cfd78ffc07a31cda853e0dc4
4
+ data.tar.gz: 636713370e0c81c2b25121be8aa58f4d959eb24c0297f64d6a6300431848b805
5
5
  SHA512:
6
- metadata.gz: 2e37c7495f13476adf58ad47eb5cb435402085af2dbca6b20821b63f23b00c907c2fca028035b1ba1e365adddcb04fd281df6f04daa13a8c2132a94bd33fc1d1
7
- data.tar.gz: 7dbb43982e33c8a5d533d8fee814f418aebf0a2e8c40df9d59a68bd726f9de67ddb8c15aee521f37c14c8e792cc0f53ca2035598819076c630468b25d0fb4c9e
6
+ metadata.gz: de9137c36aad2f633b1b682d7b22a32d99d260b7e2d5a506bd404a2a13596098755884da9292dacf6692f9751b1744c9f8eccdd5aacc8670c66cadd0ab94a287
7
+ data.tar.gz: 8885756792aeb5e473fbf4ffa638c03ba330156a10e7f800464146c93b81defb9a7b8ed5103c6b9cee010c6714b3e361eb1ae0e685dd9f94fc29eb9f2dab1fdc
@@ -1,4 +1,10 @@
1
1
  module CbetaShare
2
+ def each_canon(xml_root)
3
+ Dir.entries(xml_root).sort.each do |c|
4
+ next unless c.match(/^#{CBETA::CANON}$/)
5
+ yield(c)
6
+ end
7
+ end
2
8
 
3
9
  def to_html(e)
4
10
  e.to_xml(
@@ -8,4 +14,4 @@ module CbetaShare
8
14
  )
9
15
  end
10
16
 
11
- end
17
+ end
@@ -0,0 +1,162 @@
1
+ require_relative 'cbeta_share'
2
+
3
+ # 檢查 CBETA XML P5a
4
+ class CBETA::P5aChecker
5
+ # @param xml_root [String] 來源 CBETA XML P5a 路徑
6
+ # @param figures [String] 插圖 路徑 (可由 https://github.com/cbeta-git/CBR2X-figures 取得)
7
+ # @param log [String] Log file path
8
+ def initialize(xml_root: nil, figures: nil, log: nil)
9
+ @gaijis = CBETA::Gaiji.new
10
+ @xml_root = xml_root
11
+ @figures = figures
12
+ @log = log
13
+ end
14
+
15
+ def check
16
+ @errors = ''
17
+ @g_errors = {}
18
+ puts "xml: #{@xml_root}"
19
+ each_canon(@xml_root) do |c|
20
+ @canon = c
21
+ path = File.join(@xml_root, @canon)
22
+ handle_canon(path)
23
+ end
24
+
25
+ @g_errors.keys.sort.each do |k|
26
+ s = @g_errors[k].to_a.join(',')
27
+ @errors << "#{k} 無缺字資料,出現於:#{s}\n"
28
+ end
29
+
30
+ if @errors.empty?
31
+ puts "檢查完成,未發現錯誤。"
32
+ elsif @log.nil?
33
+ puts "\n發現錯誤:"
34
+ puts @errors
35
+ else
36
+ File.write(@log, @errors)
37
+ puts "\n發現錯誤,請查看 #{@log}"
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ include CbetaShare
44
+
45
+ def chk_text(node)
46
+ return if node.text.strip.empty?
47
+ if node.parent.name == 'div'
48
+ error "lb: #{@lb}, text: #{node.text.inspect}", type: "[E02] 文字直接出現在 div 下"
49
+ end
50
+ end
51
+
52
+ def e_g(e)
53
+ gid = e['ref'][1..-1]
54
+ unless @gaijis.key? gid
55
+ @g_errors[gid] = Set.new unless @g_errors.key? gid
56
+ @g_errors[gid] << @basename
57
+ end
58
+ end
59
+
60
+ def e_graphic(e)
61
+ url = File.basename(e['url'])
62
+ fn = File.join(@figures, @canon, url)
63
+ unless File.exist? fn
64
+ error "圖檔 #{url} 不存在"
65
+ end
66
+ end
67
+
68
+ def e_lb(e)
69
+ return if e['type']=='old'
70
+ unless e['n'].match(/^[a-z\d]\d{3}[a-z]\d+$/)
71
+ error "lb format error: #{e['n']}"
72
+ end
73
+
74
+ @lb = e['n']
75
+ ed_lb = "#{e['ed']}#{@lb}"
76
+ if @lbs.include? ed_lb
77
+ unless e['ed'].start_with?('R')
78
+ error "lb: #{@lb}, ed: #{e['ed']}", type: "[E01] 行號重複"
79
+ end
80
+ else
81
+ @lbs << ed_lb
82
+ end
83
+ end
84
+
85
+ def e_lem(e)
86
+ unless e.key?('wit')
87
+ error "lem 缺少 wit 屬性"
88
+ end
89
+ end
90
+
91
+ def e_rdg(e)
92
+ return if e['type'] == 'cbetaRemark'
93
+ unless e.key?('wit')
94
+ error "rdg 缺少 wit 屬性, lb: #{@lb}"
95
+ end
96
+ end
97
+
98
+ def error(msg, type: nil)
99
+ s = ''
100
+ s << "#{type}: " unless type.nil?
101
+ s << "#{@basename}, #{msg}"
102
+ puts s
103
+ @errors << s + "\n"
104
+ end
105
+
106
+ def handle_canon(folder)
107
+ Dir.entries(folder).sort.each do |f|
108
+ next if f.start_with? '.'
109
+ @vol = f
110
+ $stderr.puts @vol + ' '
111
+ path = File.join(folder, @vol)
112
+ handle_vol(path)
113
+ end
114
+ end
115
+
116
+ def handle_file(fn)
117
+ @basename = File.basename(fn)
118
+
119
+ s = File.read(fn)
120
+ if s.include? "\u200B"
121
+ @errors << "#{@basename} 含有 U+200B Zero Width Space 字元\n"
122
+ end
123
+
124
+ doc = Nokogiri::XML(s)
125
+ if doc.errors.empty?
126
+ doc.remove_namespaces!
127
+ @lbs = Set.new
128
+ traverse(doc.root)
129
+ else
130
+ @errors << "錯誤: #{@basename} not well-formed\n"
131
+ end
132
+ end
133
+
134
+ def handle_node(e)
135
+ case e.name
136
+ when 'g' then e_g(e)
137
+ when 'graphic' then e_graphic(e)
138
+ when 'lb' then e_lb(e)
139
+ when 'lem' then e_lem(e)
140
+ when 'rdg' then e_rdg(e)
141
+ else traverse(e)
142
+ end
143
+ end
144
+
145
+ def handle_vol(folder)
146
+ Dir.entries(folder).sort.each do |f|
147
+ next if f.start_with? '.'
148
+ path = File.join(folder, f)
149
+ handle_file(path)
150
+ end
151
+ end
152
+
153
+ def traverse(e)
154
+ e.children.each { |c|
155
+ if c.text?
156
+ chk_text(c)
157
+ elsif e.element?
158
+ handle_node(c)
159
+ end
160
+ }
161
+ end
162
+ end
data/lib/cbeta.rb CHANGED
@@ -282,6 +282,7 @@ require 'cbeta/canon'
282
282
  require 'cbeta/char_count'
283
283
  require 'cbeta/char_freq'
284
284
  require 'cbeta/html_to_pdf'
285
+ require 'cbeta/p5a_checker'
285
286
  require 'cbeta/p5a_to_html'
286
287
  require 'cbeta/p5a_to_html_for_every_edition'
287
288
  require 'cbeta/p5a_to_html_for_pdf'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.5.5
4
+ version: 3.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-31 00:00:00.000000000 Z
11
+ date: 2025-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unihan2
@@ -16,20 +16,20 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.1'
19
+ version: '1.2'
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 1.1.0
22
+ version: 1.2.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
27
  - - "~>"
28
28
  - !ruby/object:Gem::Version
29
- version: '1.1'
29
+ version: '1.2'
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 1.1.0
32
+ version: 1.2.0
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: nokogiri
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -85,6 +85,7 @@ files:
85
85
  - lib/cbeta/gaiji.rb
86
86
  - lib/cbeta/html_to_pdf.rb
87
87
  - lib/cbeta/html_to_text.rb
88
+ - lib/cbeta/p5a_checker.rb
88
89
  - lib/cbeta/p5a_to_html.rb
89
90
  - lib/cbeta/p5a_to_html_for_every_edition.rb
90
91
  - lib/cbeta/p5a_to_html_for_pdf.rb