cbeta 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10aa2f66c254289322fef17cfdcab89ba8ee5adc
4
- data.tar.gz: d03ea607e5a176b6687e5bb13d1177d7013cc4ad
3
+ metadata.gz: f0859bf502fdcd9d8180a1b3063527ab0606f567
4
+ data.tar.gz: 7d0dd6abfc13ba8b8645417506b39565d83bd685
5
5
  SHA512:
6
- metadata.gz: fb00bf29bdd5b51d62b11c2072316c9e49b881b78570a7b39e35b5e90039d054510375fddcd56f1b015e8316a6332456a22ffa97042ff308a8d3fe62542afebd
7
- data.tar.gz: fbe3013efeb80c7da0261f7d48d83744230623bef8eed1477d38989d5fbc154db32babbc06e8702839e585884bfdda4fd800f0f719cc75b2f9b8ae08cb0e8da3
6
+ metadata.gz: fecc80486dbea8477a22357e504ccfeb5c349da1108108fcef830c40327e5a94f08b223baaed2a5b82eff0bdc807bea9219bcf18988302b867b77bcc3906a90b
7
+ data.tar.gz: 156b9c7832a5d804d1d803848dd8df011981eb5ec2f836af1186aed581f42bb10e3b207e842adfa773c2ddfb6bd74c659607f867a8fd3cef6ddb3cc9f0defb24
@@ -65,4 +65,5 @@ require 'cbeta/p5a_to_epub'
65
65
  require 'cbeta/p5a_to_html'
66
66
  require 'cbeta/p5a_to_simple_html'
67
67
  require 'cbeta/p5a_to_text'
68
+ require 'cbeta/p5a_validator'
68
69
  require 'cbeta/html_to_text'
@@ -0,0 +1,107 @@
1
+ require 'nokogiri'
2
+
3
+ # 檢查 xml 是否符合 CBETA xml-p5a 編輯體例
4
+ # @example
5
+ # require 'cbeta'
6
+ #
7
+ # RNG = '/Users/ray/Documents/Projects/cbeta/schema/cbeta-p5a.rng'
8
+ # XML = '/Users/ray/Dropbox/DILA-CBETA/目次跨卷/xml/完成'
9
+ #
10
+ # v = CBETA::P5aValidator.new(RNG)
11
+ # s = v.check(XML)
12
+ #
13
+ # if s.empty?
14
+ # puts "檢查成功,未發現錯誤。"
15
+ # else
16
+ # File.write('check.log', s)
17
+ # puts "發現錯誤,請查看 check.log。"
18
+ # end
19
+ class CBETA::P5aValidator
20
+
21
+ SEP = '-' * 20 # 每筆錯誤訊息之間的分隔
22
+
23
+ private_constant :SEP
24
+
25
+ # @param schema [String] RelaxNG schema file path
26
+ def initialize(schema)
27
+ @schema = schema
28
+ end
29
+
30
+ # @param xml_root [String] 來源 CBETA XML P5a 路徑
31
+ # @return [String] 沒有錯誤的話,傳回空字串,否則傳回錯誤訊息。
32
+ def check(xml_path)
33
+ r = ''
34
+ if Dir.exist? xml_path
35
+ r = check_folder xml_path
36
+ else
37
+ r = check_file xml_path
38
+ end
39
+ r
40
+ end
41
+
42
+ private
43
+ def check_folder(folder)
44
+ r = ''
45
+ Dir.entries(folder).each do |f|
46
+ next if f.start_with? '.'
47
+ path = File.join(folder, f)
48
+ s = check_file path
49
+ unless s.empty?
50
+ r += path + "\n" + s + "\n" + SEP + "\n"
51
+ end
52
+ end
53
+ r
54
+ end
55
+
56
+ def check_file(fn)
57
+ puts "check #{fn}"
58
+ @xml_fn = fn
59
+ fi = File.open(fn)
60
+ xml = fi.read
61
+ fi.close
62
+
63
+ r = check_well_form(xml)
64
+ unless r.empty?
65
+ return "not well-form\n#{r}"
66
+ end
67
+
68
+ r = validate(xml)
69
+ unless r.empty?
70
+ return "not valid\n#{r}"
71
+ end
72
+
73
+ check_text(xml)
74
+ end
75
+
76
+ def check_text(text)
77
+ r = ''
78
+ if text.include? ' <lb'
79
+ r = 'lb 前不應有空格'
80
+ end
81
+ r
82
+ end
83
+
84
+ def check_well_form(xml)
85
+ r = ''
86
+ begin
87
+ doc = Nokogiri::XML(xml) { |config| config.strict }
88
+ rescue Nokogiri::XML::SyntaxError => e
89
+ r = "caught exception: #{e}"
90
+ end
91
+ r
92
+ end
93
+
94
+ def validate(xml)
95
+ schema = Nokogiri::XML::RelaxNG(File.open(@schema))
96
+ doc = Nokogiri::XML(xml)
97
+
98
+ errors = schema.validate(doc)
99
+ return '' if errors.empty?
100
+
101
+ r = ''
102
+ errors.each do |error|
103
+ r += error.message + "\n"
104
+ end
105
+ r
106
+ end
107
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-01 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -24,6 +24,7 @@ files:
24
24
  - lib/cbeta/p5a_to_html.rb
25
25
  - lib/cbeta/p5a_to_simple_html.rb
26
26
  - lib/cbeta/p5a_to_text.rb
27
+ - lib/cbeta/p5a_validator.rb
27
28
  - lib/data/canons.csv
28
29
  - lib/data/epub-nav.xhtml
29
30
  - lib/data/epub.css