cbeta 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10aa2f66c254289322fef17cfdcab89ba8ee5adc
4
- data.tar.gz: d03ea607e5a176b6687e5bb13d1177d7013cc4ad
3
+ metadata.gz: f0859bf502fdcd9d8180a1b3063527ab0606f567
4
+ data.tar.gz: 7d0dd6abfc13ba8b8645417506b39565d83bd685
5
5
  SHA512:
6
- metadata.gz: fb00bf29bdd5b51d62b11c2072316c9e49b881b78570a7b39e35b5e90039d054510375fddcd56f1b015e8316a6332456a22ffa97042ff308a8d3fe62542afebd
7
- data.tar.gz: fbe3013efeb80c7da0261f7d48d83744230623bef8eed1477d38989d5fbc154db32babbc06e8702839e585884bfdda4fd800f0f719cc75b2f9b8ae08cb0e8da3
6
+ metadata.gz: fecc80486dbea8477a22357e504ccfeb5c349da1108108fcef830c40327e5a94f08b223baaed2a5b82eff0bdc807bea9219bcf18988302b867b77bcc3906a90b
7
+ data.tar.gz: 156b9c7832a5d804d1d803848dd8df011981eb5ec2f836af1186aed581f42bb10e3b207e842adfa773c2ddfb6bd74c659607f867a8fd3cef6ddb3cc9f0defb24
@@ -65,4 +65,5 @@ require 'cbeta/p5a_to_epub'
65
65
  require 'cbeta/p5a_to_html'
66
66
  require 'cbeta/p5a_to_simple_html'
67
67
  require 'cbeta/p5a_to_text'
68
+ require 'cbeta/p5a_validator'
68
69
  require 'cbeta/html_to_text'
@@ -0,0 +1,107 @@
1
+ require 'nokogiri'
2
+
3
+ # 檢查 xml 是否符合 CBETA xml-p5a 編輯體例
4
+ # @example
5
+ # require 'cbeta'
6
+ #
7
+ # RNG = '/Users/ray/Documents/Projects/cbeta/schema/cbeta-p5a.rng'
8
+ # XML = '/Users/ray/Dropbox/DILA-CBETA/目次跨卷/xml/完成'
9
+ #
10
+ # v = CBETA::P5aValidator.new(RNG)
11
+ # s = v.check(XML)
12
+ #
13
+ # if s.empty?
14
+ # puts "檢查成功,未發現錯誤。"
15
+ # else
16
+ # File.write('check.log', s)
17
+ # puts "發現錯誤,請查看 check.log。"
18
+ # end
19
+ class CBETA::P5aValidator
20
+
21
+ SEP = '-' * 20 # 每筆錯誤訊息之間的分隔
22
+
23
+ private_constant :SEP
24
+
25
+ # @param schema [String] RelaxNG schema file path
26
+ def initialize(schema)
27
+ @schema = schema
28
+ end
29
+
30
+ # @param xml_root [String] 來源 CBETA XML P5a 路徑
31
+ # @return [String] 沒有錯誤的話,傳回空字串,否則傳回錯誤訊息。
32
+ def check(xml_path)
33
+ r = ''
34
+ if Dir.exist? xml_path
35
+ r = check_folder xml_path
36
+ else
37
+ r = check_file xml_path
38
+ end
39
+ r
40
+ end
41
+
42
+ private
43
+ def check_folder(folder)
44
+ r = ''
45
+ Dir.entries(folder).each do |f|
46
+ next if f.start_with? '.'
47
+ path = File.join(folder, f)
48
+ s = check_file path
49
+ unless s.empty?
50
+ r += path + "\n" + s + "\n" + SEP + "\n"
51
+ end
52
+ end
53
+ r
54
+ end
55
+
56
+ def check_file(fn)
57
+ puts "check #{fn}"
58
+ @xml_fn = fn
59
+ fi = File.open(fn)
60
+ xml = fi.read
61
+ fi.close
62
+
63
+ r = check_well_form(xml)
64
+ unless r.empty?
65
+ return "not well-form\n#{r}"
66
+ end
67
+
68
+ r = validate(xml)
69
+ unless r.empty?
70
+ return "not valid\n#{r}"
71
+ end
72
+
73
+ check_text(xml)
74
+ end
75
+
76
+ def check_text(text)
77
+ r = ''
78
+ if text.include? ' <lb'
79
+ r = 'lb 前不應有空格'
80
+ end
81
+ r
82
+ end
83
+
84
+ def check_well_form(xml)
85
+ r = ''
86
+ begin
87
+ doc = Nokogiri::XML(xml) { |config| config.strict }
88
+ rescue Nokogiri::XML::SyntaxError => e
89
+ r = "caught exception: #{e}"
90
+ end
91
+ r
92
+ end
93
+
94
+ def validate(xml)
95
+ schema = Nokogiri::XML::RelaxNG(File.open(@schema))
96
+ doc = Nokogiri::XML(xml)
97
+
98
+ errors = schema.validate(doc)
99
+ return '' if errors.empty?
100
+
101
+ r = ''
102
+ errors.each do |error|
103
+ r += error.message + "\n"
104
+ end
105
+ r
106
+ end
107
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-01 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -24,6 +24,7 @@ files:
24
24
  - lib/cbeta/p5a_to_html.rb
25
25
  - lib/cbeta/p5a_to_simple_html.rb
26
26
  - lib/cbeta/p5a_to_text.rb
27
+ - lib/cbeta/p5a_validator.rb
27
28
  - lib/data/canons.csv
28
29
  - lib/data/epub-nav.xhtml
29
30
  - lib/data/epub.css