schematron-nokogiri 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,83 @@
1
+ ISO SCHEMATRON 2010
2
+
3
+ XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
4
+
5
+ 2010-01-25
6
+
7
+ Two distributions are available. One is for XSLT1 engines.
8
+ The other is for XSLT2 engines, such as SAXON 9.
9
+
10
+
11
+ This version of Schematron splits the process into a pipeline of several different XSLT stages.
12
+
13
+ 1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
14
+ This is a macro processor to assemble the schema from various parts.
15
+ If your schema is not in separate parts, you can skip this stage.
16
+
17
+ 2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
18
+ This is a macro processor to convert abstract patterns to real patterns.
19
+ If your schema does not use abstract patterns, you can skip this
20
+ stage.
21
+
22
+ 3) Third, compile the Schematron schema into an XSLT script.
23
+ This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
24
+ (which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
25
+ However, other "meta-styleseets" are also in common use; the principle of operation is the same.
26
+ If your schema uses Schematron phases, supply these as command line/invocation parameters
27
+ to this process.
28
+
29
+ 4) Fourth, run the script generated by stage 3 against the document being validated.
30
+ If you are using the SVRL script, then the output of validation will be an XML document.
31
+ If your schema uses Schematron parameters, supply these as command line/invocation parameters
32
+ to this process.
33
+
34
+
35
+ The XSLT2 distribution also features several next generation features,
36
+ such as validating multiple documents. See the source code for details.
37
+
38
+ Schematron assertions can be written in any language, of course; the file
39
+ sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
40
+ in English, and this can be used as template to localize the skeleton's
41
+ error messages. Note that typically programming errors in Schematron are XPath
42
+ errors, which requires localized messages from the XSLT engine.
43
+
44
+ ANT
45
+ ---
46
+ To give an example of how to process a document, here is a sample ANT task.
47
+
48
+ <target name="schematron-compile-test" >
49
+
50
+ <!-- expand inclusions -->
51
+ <xslt basedir="test/schematron"
52
+ style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
53
+ <classpath>
54
+ <pathelement location="${lib.dir}/saxon9.jar"/>
55
+ </classpath>
56
+ </xslt>
57
+
58
+ <!-- expand abstract patterns -->
59
+ <xslt basedir="test/schematron"
60
+ style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
61
+ <classpath>
62
+ <pathelement location="${lib.dir}/saxon9.jar"/>
63
+ </classpath>
64
+ </xslt>
65
+
66
+
67
+
68
+ <!-- compile it -->
69
+ <xslt basedir="test/schematron"
70
+ style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
71
+ <classpath>
72
+ <pathelement location="${lib.dir}/saxon9.jar"/>
73
+ </classpath>
74
+ </xslt>
75
+
76
+ <!-- validate -->
77
+ <xslt basedir="test/schematron"
78
+ style="test.xsl" in="instance.xml" out="instance.svrlt">
79
+ <classpath>
80
+ <pathelement location="${lib.dir}/saxon9.jar"/>
81
+ </classpath>
82
+ </xslt>
83
+ </target>
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+
3
+ module SchematronNokogiri
4
+
5
+ class Schema
6
+
7
+ # The location of the ISO schematron implemtation lives
8
+ ISO_IMPL_DIR = File.join File.dirname(__FILE__), "..", 'iso-schematron-xslt1'
9
+
10
+ # The file names of the compilation stages
11
+ ISO_FILES = ['iso_dsdl_include.xsl',
12
+ 'iso_abstract_expand.xsl',
13
+ 'iso_svrl_for_xslt1.xsl']
14
+
15
+ # Namespace prefix declarations for use in XPaths
16
+ NS_PREFIXES = {
17
+ 'svrl' => 'http://purl.oclc.org/dsdl/svrl'
18
+ }
19
+
20
+ def initialize(doc)
21
+ schema_doc = doc
22
+
23
+ xforms = ISO_FILES.map do |file|
24
+
25
+ Dir.chdir(ISO_IMPL_DIR) do
26
+ Nokogiri::XSLT(File.open(file))
27
+ end
28
+
29
+ end
30
+
31
+ # Compile schematron into xsl that maps to svrl
32
+ validator_doc = xforms.inject(schema_doc) {
33
+ |xml, xsl| xsl.transform xml
34
+ }
35
+ @validator_xsl = Nokogiri::XSLT(validator_doc.to_s)
36
+ end
37
+
38
+ def validate(instance_doc)
39
+
40
+ # Validate the xml
41
+ results_doc = @validator_xsl.transform instance_doc
42
+
43
+ # compile the errors and log any messages
44
+ rule_hits(results_doc, instance_doc, 'assert', '//svrl:failed-assert') +
45
+ rule_hits(results_doc, instance_doc, 'report', '//svrl:successful-report')
46
+ end
47
+
48
+ # Look for reported or failed rules of a particular type in the instance doc
49
+ def rule_hits(results_doc, instance_doc, rule_type, xpath)
50
+
51
+ results = []
52
+
53
+ results_doc.root.xpath(xpath, NS_PREFIXES).each do |hit|
54
+ context = instance_doc.root.xpath(hit['location'], NS_PREFIXES).first
55
+
56
+ hit.xpath('svrl:text/text()', NS_PREFIXES).each do |message|
57
+ results << {
58
+ :rule_type => rule_type,
59
+ :type => node_type(context),
60
+ :name => context.name,
61
+ :line => context.line,
62
+ :message => message.content.strip}
63
+ end
64
+ end
65
+
66
+ results
67
+
68
+ end
69
+
70
+ def node_type(node)
71
+ case
72
+ when node.cdata?
73
+ 'cdata'
74
+ when node.comment?
75
+ 'comment'
76
+ when node.element?
77
+ 'element'
78
+ when node.fragment?
79
+ 'fragment'
80
+ end
81
+ end
82
+
83
+ end
84
+ end
85
+
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "schematron-nokogiri"
3
+ spec.description = "Using this gem you can validate an XML document using a ISO Schematron validation file"
4
+ spec.version = "0.0.1" #SemVer.find.format '%M.%m.%p'
5
+ spec.summary = "ISO Schematron Validation using Nokogiri"
6
+ spec.email = "alexxed@gmail.com"
7
+ spec.homepage = 'https://github.com/alexxed/schematron'
8
+ spec.authors = ["Francesco Lazzarino", "Alexandru Szasz"]
9
+ spec.executables << 'stron-nokogiri'
10
+ spec.licenses = ["MIT"]
11
+
12
+ spec.files = ["schematron-nokogiri.gemspec", "README.md", "LICENSE.txt", '.semver']
13
+ spec.files += Dir['lib/*.rb']
14
+ spec.files += Dir['spec/**/*']
15
+ spec.files += Dir['iso-schematron-xslt1/*']
16
+ spec.add_dependency 'nokogiri', '~> 1.6'
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'schematron-nokogiri'
3
+
4
+ describe "validate executable" do
5
+
6
+ it "should take only a schema and an instance document" do
7
+ `ruby -Ilib bin/stron-nokogiri theschema`.should =~ /Usage: /
8
+ end
9
+
10
+ it "should validate a good instance doc" do
11
+ schema = 'spec/schema/fda_sip.sch'
12
+ instance = 'spec/instances/daitss-sip/Example1.xml'
13
+ `ruby -Ilib bin/stron-nokogiri #{schema} #{instance}`.should be_empty
14
+ end
15
+
16
+ it "should print errors to standard out" do
17
+ schema = 'spec/schema/fda_sip.sch'
18
+ instance = 'spec/instances/daitss-sip/Example2.xml'
19
+ `ruby -Ilib bin/stron-nokogiri #{schema} #{instance}`.should =~ /^element "file" on line 48/
20
+ end
21
+
22
+ end
@@ -0,0 +1,5 @@
1
+ describe "feature requests" do
2
+ it "should work with http://www.zvon.org/xxl/XMLSchemaTutorial/Output/ser_keys_st0.html examples"
3
+ # if current() isnot implemented then look into implementing
4
+ # extensions to libxslt http://xmlsoft.org/XSLT/extensions.html
5
+ end
@@ -0,0 +1,37 @@
1
+ <mets xmlns="http://www.loc.gov/METS/"
2
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xmlns:xlink="http://www.w3.org/1999/xlink"
4
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
5
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
6
+ <amdSec>
7
+
8
+ <!-- required agreement info -->
9
+ <digiprovMD ID="digiprov-1">
10
+ <mdWrap MDTYPE="OTHER" OTHERMDTYPE="DATISS">
11
+ <xmlData>
12
+ <daitss xmlns="http://www.fcla.edu/dls/md/daitss/">
13
+ <AGREEMENT_INFO ACCOUNT="YOUR_ACCOUNT" PROJECT="THIS_PROJECT"/>
14
+ </daitss>
15
+ </xmlData>
16
+ </mdWrap>
17
+ </digiprovMD>
18
+
19
+ </amdSec>
20
+
21
+ <!-- must have checksum, checksum type & file location -->
22
+ <fileSec>
23
+ <fileGrp>
24
+ <file ID="file-1" xCHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
25
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
26
+ </file>
27
+ </fileGrp>
28
+ </fileSec>
29
+
30
+ <!-- struct map can be fashioned in any way valid to mets -->
31
+ <structMap>
32
+ <div>
33
+ <fptr FILEID="file-1"/>
34
+ </div>
35
+ </structMap>
36
+
37
+ </mets>
@@ -0,0 +1,68 @@
1
+ <mets xmlns="http://www.loc.gov/METS/"
2
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xmlns:xlink="http://www.w3.org/1999/xlink"
4
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
5
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
6
+ <amdSec>
7
+
8
+ <!-- required agreement info -->
9
+ <digiprovMD ID="digiprov-1">
10
+ <mdWrap MDTYPE="OTHER" OTHERMDTYPE="DATISS">
11
+ <xmlData>
12
+ <daitss xmlns="http://www.fcla.edu/dls/md/daitss/">
13
+ <AGREEMENT_INFO ACCOUNT="YOUR_ACCOUNT" PROJECT="THIS_PROJECT"/>
14
+ </daitss>
15
+ </xmlData>
16
+ </mdWrap>
17
+ </digiprovMD>
18
+
19
+ </amdSec>
20
+
21
+ <!-- must have checksum, checksum type & file location -->
22
+ <fileSec>
23
+ <fileGrp>
24
+ <file ID="file-1" CHECKSUM="259c4fabf48fd8fa664fb7123e470624" CHECKSUMTYPE="MD5">
25
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_1"/>
26
+ </file>
27
+
28
+ <file ID="file-2" CHECKSUM="6913536569c62bb11a4bb453e0553a56" CHECKSUMTYPE="MD5">
29
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_2"/>
30
+ </file>
31
+
32
+ <file ID="file-3" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
33
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_3"/>
34
+ </file>
35
+
36
+ <file ID="file-4" CHECKSUM="1bd851567835a6b6ce0b78502f1d0abe" CHECKSUMTYPE="MD5">
37
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_4"/>
38
+ </file>
39
+
40
+ <file ID="file-5" CHECKSUM="545976801e805db1c77cb9f4fa55c051" CHECKSUMTYPE="MD5">
41
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_5"/>
42
+ </file>
43
+
44
+ <file ID="file-6">
45
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_6"/>
46
+ </file>
47
+
48
+ <file ID="file-7" CHECKSUM="bfb0cf6095cd5ab3837119b11d3f285500000000z" CHECKSUMTYPE="SHA-1">
49
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_7"/>
50
+ </file>
51
+
52
+ </fileGrp>
53
+ </fileSec>
54
+
55
+ <!-- struct map can be fashioned in any way valid to mets -->
56
+ <structMap>
57
+ <div>
58
+ <fptr FILEID="file-1"/>
59
+ <fptr FILEID="file-2"/>
60
+ <fptr FILEID="file-3"/>
61
+ <fptr FILEID="file-4"/>
62
+ <fptr FILEID="file-5"/>
63
+ <fptr FILEID="file-6"/>
64
+ <fptr FILEID="file-7"/>
65
+ </div>
66
+ </structMap>
67
+
68
+ </mets>
@@ -0,0 +1,102 @@
1
+ <!-- Example showing PREMIS in METS -->
2
+
3
+ <mets xmlns="http://www.loc.gov/METS/"
4
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ xmlns:xlink="http://www.w3.org/1999/xlink"
6
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
7
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
8
+ <amdSec>
9
+
10
+
11
+ <!-- WRONG: digiprovMD/object -->
12
+ <digiprovMD ID="digiprov-1">
13
+ <mdWrap MDTYPE="PREMIS">
14
+ <xmlData>
15
+
16
+ <object xmlns="info:lc/xmlns/premis-v2" xsi:type="file">
17
+ <objectIdentifier>
18
+ <objectIdentifierType>DAITSS2</objectIdentifierType>
19
+ <objectIdentifierValue>daitss://archive.fcla.edu/p926/f0</objectIdentifierValue>
20
+ </objectIdentifier>
21
+ <objectCharacteristics>
22
+ <compositionLevel>0</compositionLevel>
23
+ <fixity>
24
+ <messageDigestAlgorithm>MD5</messageDigestAlgorithm>
25
+ <messageDigest>2de9ef79df730f93e40819625cf7bcb2</messageDigest>
26
+ </fixity>
27
+ <size>3001452</size>
28
+ <format>
29
+ <formatDesignation>
30
+ <formatName>TIFF</formatName>
31
+ <formatVersion>4.0</formatVersion>
32
+ </formatDesignation>
33
+ <formatRegistry>
34
+ <formatRegistryName>http://www.nationalarchives.gov.uk/pronom</formatRegistryName>
35
+ <formatRegistryKey>fmt/8</formatRegistryKey>
36
+ </formatRegistry>
37
+ </format>
38
+ </objectCharacteristics>
39
+ <storage>
40
+ <contentLocation>
41
+ <contentLocationType>URL</contentLocationType>
42
+ <contentLocationValue>http://archive.fcla.edu:5000/silos/gainesville-1/p926</contentLocationValue>
43
+ </contentLocation>
44
+ <storageMedium>Hard disk</storageMedium>
45
+ </storage>
46
+ </object>
47
+
48
+ </xmlData>
49
+ </mdWrap>
50
+ </digiprovMD>
51
+
52
+ <!-- WRONG techMD/event -->
53
+ <techMD ID="tech-1">
54
+ <mdWrap MDTYPE="PREMIS">
55
+ <xmlData>
56
+
57
+ <event xmlns="info:lc/xmlns/premis-v2">
58
+ <eventIdentifier>
59
+ <eventIdentifierType>DAITSS</eventIdentifierType>
60
+ <eventIdentifierValue>1</eventIdentifierValue>
61
+ </eventIdentifier>
62
+ <eventType>Format Validation</eventType>
63
+ <eventDateTime>2008-05-06T10:40:22-04:00</eventDateTime>
64
+ <eventOutcomeInformation>
65
+ <eventOutcome>Invalid</eventOutcome>
66
+ <eventOutcomeDetail>
67
+ <eventOutcomeDetailExtension>
68
+ <anomaly>ill-formed DateTime value</anomaly>
69
+ </eventOutcomeDetailExtension>
70
+ </eventOutcomeDetail>
71
+ </eventOutcomeInformation>
72
+ <linkingAgentIdentifier>
73
+ <linkingAgentIdentifierType>URI</linkingAgentIdentifierType>
74
+ <linkingAgentIdentifierValue>http://archive.fcla.edu/format-description</linkingAgentIdentifierValue>
75
+ </linkingAgentIdentifier>
76
+ <linkingObjectIdentifier>
77
+ <linkingObjectIdentifierType>DAITSS2</linkingObjectIdentifierType>
78
+ <linkingObjectIdentifierValue>daitss://archive.fcla.edu/p926/f0</linkingObjectIdentifierValue>
79
+ </linkingObjectIdentifier>
80
+ </event>
81
+
82
+ </xmlData>
83
+ </mdWrap>
84
+ </techMD>
85
+
86
+ </amdSec>
87
+
88
+ <fileSec>
89
+ <fileGrp>
90
+ <file ID="file-1" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
91
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
92
+ </file>
93
+ </fileGrp>
94
+ </fileSec>
95
+
96
+ <structMap>
97
+ <div>
98
+ <fptr FILEID="file-1"/>
99
+ </div>
100
+ </structMap>
101
+
102
+ </mets>
@@ -0,0 +1,103 @@
1
+ <!-- Example showing PREMIS in METS -->
2
+
3
+ <mets xmlns="http://www.loc.gov/METS/"
4
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ xmlns:xlink="http://www.w3.org/1999/xlink"
6
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
7
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
8
+ <amdSec>
9
+
10
+
11
+ <!-- techMD/object -->
12
+ <techMD ID="tech-1">
13
+ <mdWrap MDTYPE="PREMIS">
14
+ <xmlData>
15
+
16
+ <object xmlns="info:lc/xmlns/premis-v2" xsi:type="file">
17
+ <objectIdentifier>
18
+ <objectIdentifierType>DAITSS2</objectIdentifierType>
19
+ <objectIdentifierValue>daitss://archive.fcla.edu/p926/f0</objectIdentifierValue>
20
+ </objectIdentifier>
21
+ <objectCharacteristics>
22
+ <compositionLevel>0</compositionLevel>
23
+ <fixity>
24
+ <messageDigestAlgorithm>MD5</messageDigestAlgorithm>
25
+ <messageDigest>2de9ef79df730f93e40819625cf7bcb2</messageDigest>
26
+ </fixity>
27
+ <size>3001452</size>
28
+ <format>
29
+ <formatDesignation>
30
+ <formatName>TIFF</formatName>
31
+ <formatVersion>4.0</formatVersion>
32
+ </formatDesignation>
33
+ <formatRegistry>
34
+ <formatRegistryName>http://www.nationalarchives.gov.uk/pronom</formatRegistryName>
35
+ <formatRegistryKey>fmt/8</formatRegistryKey>
36
+ </formatRegistry>
37
+ </format>
38
+ </objectCharacteristics>
39
+ <storage>
40
+ <contentLocation>
41
+ <contentLocationType>URL</contentLocationType>
42
+ <contentLocationValue>http://archive.fcla.edu:5000/silos/gainesville-1/p926</contentLocationValue>
43
+ </contentLocation>
44
+ <storageMedium>Hard disk</storageMedium>
45
+ </storage>
46
+ </object>
47
+
48
+ </xmlData>
49
+ </mdWrap>
50
+ </techMD>
51
+
52
+
53
+ <!-- digiprovMD/event -->
54
+ <digiprovMD ID="digiprov-1">
55
+ <mdWrap MDTYPE="PREMIS">
56
+ <xmlData>
57
+
58
+ <event xmlns="info:lc/xmlns/premis-v2">
59
+ <eventIdentifier>
60
+ <eventIdentifierType>DAITSS</eventIdentifierType>
61
+ <eventIdentifierValue>1</eventIdentifierValue>
62
+ </eventIdentifier>
63
+ <eventType>Format Validation</eventType>
64
+ <eventDateTime>2008-05-06T10:40:22-04:00</eventDateTime>
65
+ <eventOutcomeInformation>
66
+ <eventOutcome>Invalid</eventOutcome>
67
+ <eventOutcomeDetail>
68
+ <eventOutcomeDetailExtension>
69
+ <anomaly>ill-formed DateTime value</anomaly>
70
+ </eventOutcomeDetailExtension>
71
+ </eventOutcomeDetail>
72
+ </eventOutcomeInformation>
73
+ <linkingAgentIdentifier>
74
+ <linkingAgentIdentifierType>URI</linkingAgentIdentifierType>
75
+ <linkingAgentIdentifierValue>http://archive.fcla.edu/format-description</linkingAgentIdentifierValue>
76
+ </linkingAgentIdentifier>
77
+ <linkingObjectIdentifier>
78
+ <linkingObjectIdentifierType>DAITSS2</linkingObjectIdentifierType>
79
+ <linkingObjectIdentifierValue>daitss://archive.fcla.edu/p926/f0</linkingObjectIdentifierValue>
80
+ </linkingObjectIdentifier>
81
+ </event>
82
+
83
+ </xmlData>
84
+ </mdWrap>
85
+ </digiprovMD>
86
+
87
+ </amdSec>
88
+
89
+ <fileSec>
90
+ <fileGrp>
91
+ <file ID="file-1" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
92
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
93
+ </file>
94
+ </fileGrp>
95
+ </fileSec>
96
+
97
+ <structMap>
98
+ <div>
99
+ <fptr FILEID="file-1"/>
100
+ </div>
101
+ </structMap>
102
+
103
+ </mets>