cbeta 0.5.3 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cbeta.rb +1 -0
- data/lib/cbeta/p5a_validator.rb +107 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0859bf502fdcd9d8180a1b3063527ab0606f567
|
4
|
+
data.tar.gz: 7d0dd6abfc13ba8b8645417506b39565d83bd685
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fecc80486dbea8477a22357e504ccfeb5c349da1108108fcef830c40327e5a94f08b223baaed2a5b82eff0bdc807bea9219bcf18988302b867b77bcc3906a90b
|
7
|
+
data.tar.gz: 156b9c7832a5d804d1d803848dd8df011981eb5ec2f836af1186aed581f42bb10e3b207e842adfa773c2ddfb6bd74c659607f867a8fd3cef6ddb3cc9f0defb24
|
data/lib/cbeta.rb
CHANGED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
# 檢查 xml 是否符合 CBETA xml-p5a 編輯體例
|
4
|
+
# @example
|
5
|
+
# require 'cbeta'
|
6
|
+
#
|
7
|
+
# RNG = '/Users/ray/Documents/Projects/cbeta/schema/cbeta-p5a.rng'
|
8
|
+
# XML = '/Users/ray/Dropbox/DILA-CBETA/目次跨卷/xml/完成'
|
9
|
+
#
|
10
|
+
# v = CBETA::P5aValidator.new(RNG)
|
11
|
+
# s = v.check(XML)
|
12
|
+
#
|
13
|
+
# if s.empty?
|
14
|
+
# puts "檢查成功,未發現錯誤。"
|
15
|
+
# else
|
16
|
+
# File.write('check.log', s)
|
17
|
+
# puts "發現錯誤,請查看 check.log。"
|
18
|
+
# end
|
19
|
+
class CBETA::P5aValidator
|
20
|
+
|
21
|
+
SEP = '-' * 20 # 每筆錯誤訊息之間的分隔
|
22
|
+
|
23
|
+
private_constant :SEP
|
24
|
+
|
25
|
+
# @param schema [String] RelaxNG schema file path
|
26
|
+
def initialize(schema)
|
27
|
+
@schema = schema
|
28
|
+
end
|
29
|
+
|
30
|
+
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
31
|
+
# @return [String] 沒有錯誤的話,傳回空字串,否則傳回錯誤訊息。
|
32
|
+
def check(xml_path)
|
33
|
+
r = ''
|
34
|
+
if Dir.exist? xml_path
|
35
|
+
r = check_folder xml_path
|
36
|
+
else
|
37
|
+
r = check_file xml_path
|
38
|
+
end
|
39
|
+
r
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def check_folder(folder)
|
44
|
+
r = ''
|
45
|
+
Dir.entries(folder).each do |f|
|
46
|
+
next if f.start_with? '.'
|
47
|
+
path = File.join(folder, f)
|
48
|
+
s = check_file path
|
49
|
+
unless s.empty?
|
50
|
+
r += path + "\n" + s + "\n" + SEP + "\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
r
|
54
|
+
end
|
55
|
+
|
56
|
+
def check_file(fn)
|
57
|
+
puts "check #{fn}"
|
58
|
+
@xml_fn = fn
|
59
|
+
fi = File.open(fn)
|
60
|
+
xml = fi.read
|
61
|
+
fi.close
|
62
|
+
|
63
|
+
r = check_well_form(xml)
|
64
|
+
unless r.empty?
|
65
|
+
return "not well-form\n#{r}"
|
66
|
+
end
|
67
|
+
|
68
|
+
r = validate(xml)
|
69
|
+
unless r.empty?
|
70
|
+
return "not valid\n#{r}"
|
71
|
+
end
|
72
|
+
|
73
|
+
check_text(xml)
|
74
|
+
end
|
75
|
+
|
76
|
+
def check_text(text)
|
77
|
+
r = ''
|
78
|
+
if text.include? ' <lb'
|
79
|
+
r = 'lb 前不應有空格'
|
80
|
+
end
|
81
|
+
r
|
82
|
+
end
|
83
|
+
|
84
|
+
def check_well_form(xml)
|
85
|
+
r = ''
|
86
|
+
begin
|
87
|
+
doc = Nokogiri::XML(xml) { |config| config.strict }
|
88
|
+
rescue Nokogiri::XML::SyntaxError => e
|
89
|
+
r = "caught exception: #{e}"
|
90
|
+
end
|
91
|
+
r
|
92
|
+
end
|
93
|
+
|
94
|
+
def validate(xml)
|
95
|
+
schema = Nokogiri::XML::RelaxNG(File.open(@schema))
|
96
|
+
doc = Nokogiri::XML(xml)
|
97
|
+
|
98
|
+
errors = schema.validate(doc)
|
99
|
+
return '' if errors.empty?
|
100
|
+
|
101
|
+
r = ''
|
102
|
+
errors.each do |error|
|
103
|
+
r += error.message + "\n"
|
104
|
+
end
|
105
|
+
r
|
106
|
+
end
|
107
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -24,6 +24,7 @@ files:
|
|
24
24
|
- lib/cbeta/p5a_to_html.rb
|
25
25
|
- lib/cbeta/p5a_to_simple_html.rb
|
26
26
|
- lib/cbeta/p5a_to_text.rb
|
27
|
+
- lib/cbeta/p5a_validator.rb
|
27
28
|
- lib/data/canons.csv
|
28
29
|
- lib/data/epub-nav.xhtml
|
29
30
|
- lib/data/epub.css
|