cbeta 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta.rb +1 -0
- data/lib/cbeta/p5a_validator.rb +107 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0859bf502fdcd9d8180a1b3063527ab0606f567
|
4
|
+
data.tar.gz: 7d0dd6abfc13ba8b8645417506b39565d83bd685
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fecc80486dbea8477a22357e504ccfeb5c349da1108108fcef830c40327e5a94f08b223baaed2a5b82eff0bdc807bea9219bcf18988302b867b77bcc3906a90b
|
7
|
+
data.tar.gz: 156b9c7832a5d804d1d803848dd8df011981eb5ec2f836af1186aed581f42bb10e3b207e842adfa773c2ddfb6bd74c659607f867a8fd3cef6ddb3cc9f0defb24
|
data/lib/cbeta.rb
CHANGED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
# 檢查 xml 是否符合 CBETA xml-p5a 編輯體例
|
4
|
+
# @example
|
5
|
+
# require 'cbeta'
|
6
|
+
#
|
7
|
+
# RNG = '/Users/ray/Documents/Projects/cbeta/schema/cbeta-p5a.rng'
|
8
|
+
# XML = '/Users/ray/Dropbox/DILA-CBETA/目次跨卷/xml/完成'
|
9
|
+
#
|
10
|
+
# v = CBETA::P5aValidator.new(RNG)
|
11
|
+
# s = v.check(XML)
|
12
|
+
#
|
13
|
+
# if s.empty?
|
14
|
+
# puts "檢查成功,未發現錯誤。"
|
15
|
+
# else
|
16
|
+
# File.write('check.log', s)
|
17
|
+
# puts "發現錯誤,請查看 check.log。"
|
18
|
+
# end
|
19
|
+
class CBETA::P5aValidator
|
20
|
+
|
21
|
+
SEP = '-' * 20 # 每筆錯誤訊息之間的分隔
|
22
|
+
|
23
|
+
private_constant :SEP
|
24
|
+
|
25
|
+
# @param schema [String] RelaxNG schema file path
|
26
|
+
def initialize(schema)
|
27
|
+
@schema = schema
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
31
|
+
# @return [String] 沒有錯誤的話,傳回空字串,否則傳回錯誤訊息。
|
32
|
+
def check(xml_path)
|
33
|
+
r = ''
|
34
|
+
if Dir.exist? xml_path
|
35
|
+
r = check_folder xml_path
|
36
|
+
else
|
37
|
+
r = check_file xml_path
|
38
|
+
end
|
39
|
+
r
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def check_folder(folder)
|
44
|
+
r = ''
|
45
|
+
Dir.entries(folder).each do |f|
|
46
|
+
next if f.start_with? '.'
|
47
|
+
path = File.join(folder, f)
|
48
|
+
s = check_file path
|
49
|
+
unless s.empty?
|
50
|
+
r += path + "\n" + s + "\n" + SEP + "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
r
|
54
|
+
end
|
55
|
+
|
56
|
+
def check_file(fn)
|
57
|
+
puts "check #{fn}"
|
58
|
+
@xml_fn = fn
|
59
|
+
fi = File.open(fn)
|
60
|
+
xml = fi.read
|
61
|
+
fi.close
|
62
|
+
|
63
|
+
r = check_well_form(xml)
|
64
|
+
unless r.empty?
|
65
|
+
return "not well-form\n#{r}"
|
66
|
+
end
|
67
|
+
|
68
|
+
r = validate(xml)
|
69
|
+
unless r.empty?
|
70
|
+
return "not valid\n#{r}"
|
71
|
+
end
|
72
|
+
|
73
|
+
check_text(xml)
|
74
|
+
end
|
75
|
+
|
76
|
+
def check_text(text)
|
77
|
+
r = ''
|
78
|
+
if text.include? ' <lb'
|
79
|
+
r = 'lb 前不應有空格'
|
80
|
+
end
|
81
|
+
r
|
82
|
+
end
|
83
|
+
|
84
|
+
def check_well_form(xml)
|
85
|
+
r = ''
|
86
|
+
begin
|
87
|
+
doc = Nokogiri::XML(xml) { |config| config.strict }
|
88
|
+
rescue Nokogiri::XML::SyntaxError => e
|
89
|
+
r = "caught exception: #{e}"
|
90
|
+
end
|
91
|
+
r
|
92
|
+
end
|
93
|
+
|
94
|
+
def validate(xml)
|
95
|
+
schema = Nokogiri::XML::RelaxNG(File.open(@schema))
|
96
|
+
doc = Nokogiri::XML(xml)
|
97
|
+
|
98
|
+
errors = schema.validate(doc)
|
99
|
+
return '' if errors.empty?
|
100
|
+
|
101
|
+
r = ''
|
102
|
+
errors.each do |error|
|
103
|
+
r += error.message + "\n"
|
104
|
+
end
|
105
|
+
r
|
106
|
+
end
|
107
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -24,6 +24,7 @@ files:
|
|
24
24
|
- lib/cbeta/p5a_to_html.rb
|
25
25
|
- lib/cbeta/p5a_to_simple_html.rb
|
26
26
|
- lib/cbeta/p5a_to_text.rb
|
27
|
+
- lib/cbeta/p5a_validator.rb
|
27
28
|
- lib/data/canons.csv
|
28
29
|
- lib/data/epub-nav.xhtml
|
29
30
|
- lib/data/epub.css
|