schematron-nokogiri 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ ISO SCHEMATRON 2010
2
+
3
+ XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
4
+
5
+ 2010-01-25
6
+
7
+ Two distributions are available. One is for XSLT1 engines.
8
+ The other is for XSLT2 engines, such as SAXON 9.
9
+
10
+
11
+ This version of Schematron splits the process into a pipeline of several different XSLT stages.
12
+
13
+ 1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
14
+ This is a macro processor to assemble the schema from various parts.
15
+ If your schema is not in separate parts, you can skip this stage.
16
+
17
+ 2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
18
+ This is a macro processor to convert abstract patterns to real patterns.
19
+ If your schema does not use abstract patterns, you can skip this
20
+ stage.
21
+
22
+ 3) Third, compile the Schematron schema into an XSLT script.
23
+ This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
24
+ (which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
25
+ However, other "meta-styleseets" are also in common use; the principle of operation is the same.
26
+ If your schema uses Schematron phases, supply these as command line/invocation parameters
27
+ to this process.
28
+
29
+ 4) Fourth, run the script generated by stage 3 against the document being validated.
30
+ If you are using the SVRL script, then the output of validation will be an XML document.
31
+ If your schema uses Schematron parameters, supply these as command line/invocation parameters
32
+ to this process.
33
+
34
+
35
+ The XSLT2 distribution also features several next generation features,
36
+ such as validating multiple documents. See the source code for details.
37
+
38
+ Schematron assertions can be written in any language, of course; the file
39
+ sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
40
+ in English, and this can be used as template to localize the skeleton's
41
+ error messages. Note that typically programming errors in Schematron are XPath
42
+ errors, which requires localized messages from the XSLT engine.
43
+
44
+ ANT
45
+ ---
46
+ To give an example of how to process a document, here is a sample ANT task.
47
+
48
+ <target name="schematron-compile-test" >
49
+
50
+ <!-- expand inclusions -->
51
+ <xslt basedir="test/schematron"
52
+ style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
53
+ <classpath>
54
+ <pathelement location="${lib.dir}/saxon9.jar"/>
55
+ </classpath>
56
+ </xslt>
57
+
58
+ <!-- expand abstract patterns -->
59
+ <xslt basedir="test/schematron"
60
+ style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
61
+ <classpath>
62
+ <pathelement location="${lib.dir}/saxon9.jar"/>
63
+ </classpath>
64
+ </xslt>
65
+
66
+
67
+
68
+ <!-- compile it -->
69
+ <xslt basedir="test/schematron"
70
+ style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
71
+ <classpath>
72
+ <pathelement location="${lib.dir}/saxon9.jar"/>
73
+ </classpath>
74
+ </xslt>
75
+
76
+ <!-- validate -->
77
+ <xslt basedir="test/schematron"
78
+ style="test.xsl" in="instance.xml" out="instance.svrlt">
79
+ <classpath>
80
+ <pathelement location="${lib.dir}/saxon9.jar"/>
81
+ </classpath>
82
+ </xslt>
83
+ </target>
@@ -0,0 +1,85 @@
1
+ require 'nokogiri'
2
+
3
+ module SchematronNokogiri
4
+
5
+ class Schema
6
+
7
+ # The location of the ISO schematron implemtation lives
8
+ ISO_IMPL_DIR = File.join File.dirname(__FILE__), "..", 'iso-schematron-xslt1'
9
+
10
+ # The file names of the compilation stages
11
+ ISO_FILES = ['iso_dsdl_include.xsl',
12
+ 'iso_abstract_expand.xsl',
13
+ 'iso_svrl_for_xslt1.xsl']
14
+
15
+ # Namespace prefix declarations for use in XPaths
16
+ NS_PREFIXES = {
17
+ 'svrl' => 'http://purl.oclc.org/dsdl/svrl'
18
+ }
19
+
20
+ def initialize(doc)
21
+ schema_doc = doc
22
+
23
+ xforms = ISO_FILES.map do |file|
24
+
25
+ Dir.chdir(ISO_IMPL_DIR) do
26
+ Nokogiri::XSLT(File.open(file))
27
+ end
28
+
29
+ end
30
+
31
+ # Compile schematron into xsl that maps to svrl
32
+ validator_doc = xforms.inject(schema_doc) {
33
+ |xml, xsl| xsl.transform xml
34
+ }
35
+ @validator_xsl = Nokogiri::XSLT(validator_doc.to_s)
36
+ end
37
+
38
+ def validate(instance_doc)
39
+
40
+ # Validate the xml
41
+ results_doc = @validator_xsl.transform instance_doc
42
+
43
+ # compile the errors and log any messages
44
+ rule_hits(results_doc, instance_doc, 'assert', '//svrl:failed-assert') +
45
+ rule_hits(results_doc, instance_doc, 'report', '//svrl:successful-report')
46
+ end
47
+
48
+ # Look for reported or failed rules of a particular type in the instance doc
49
+ def rule_hits(results_doc, instance_doc, rule_type, xpath)
50
+
51
+ results = []
52
+
53
+ results_doc.root.xpath(xpath, NS_PREFIXES).each do |hit|
54
+ context = instance_doc.root.xpath(hit['location'], NS_PREFIXES).first
55
+
56
+ hit.xpath('svrl:text/text()', NS_PREFIXES).each do |message|
57
+ results << {
58
+ :rule_type => rule_type,
59
+ :type => node_type(context),
60
+ :name => context.name,
61
+ :line => context.line,
62
+ :message => message.content.strip}
63
+ end
64
+ end
65
+
66
+ results
67
+
68
+ end
69
+
70
+ def node_type(node)
71
+ case
72
+ when node.cdata?
73
+ 'cdata'
74
+ when node.comment?
75
+ 'comment'
76
+ when node.element?
77
+ 'element'
78
+ when node.fragment?
79
+ 'fragment'
80
+ end
81
+ end
82
+
83
+ end
84
+ end
85
+
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "schematron-nokogiri"
3
+ spec.description = "Using this gem you can validate an XML document using a ISO Schematron validation file"
4
+ spec.version = "0.0.1" #SemVer.find.format '%M.%m.%p'
5
+ spec.summary = "ISO Schematron Validation using Nokogiri"
6
+ spec.email = "alexxed@gmail.com"
7
+ spec.homepage = 'https://github.com/alexxed/schematron'
8
+ spec.authors = ["Francesco Lazzarino", "Alexandru Szasz"]
9
+ spec.executables << 'stron-nokogiri'
10
+ spec.licenses = ["MIT"]
11
+
12
+ spec.files = ["schematron-nokogiri.gemspec", "README.md", "LICENSE.txt", '.semver']
13
+ spec.files += Dir['lib/*.rb']
14
+ spec.files += Dir['spec/**/*']
15
+ spec.files += Dir['iso-schematron-xslt1/*']
16
+ spec.add_dependency 'nokogiri', '~> 1.6'
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+ require 'schematron-nokogiri'
3
+
4
+ describe "validate executable" do
5
+
6
+ it "should take only a schema and an instance document" do
7
+ `ruby -Ilib bin/stron-nokogiri theschema`.should =~ /Usage: /
8
+ end
9
+
10
+ it "should validate a good instance doc" do
11
+ schema = 'spec/schema/fda_sip.sch'
12
+ instance = 'spec/instances/daitss-sip/Example1.xml'
13
+ `ruby -Ilib bin/stron-nokogiri #{schema} #{instance}`.should be_empty
14
+ end
15
+
16
+ it "should print errors to standard out" do
17
+ schema = 'spec/schema/fda_sip.sch'
18
+ instance = 'spec/instances/daitss-sip/Example2.xml'
19
+ `ruby -Ilib bin/stron-nokogiri #{schema} #{instance}`.should =~ /^element "file" on line 48/
20
+ end
21
+
22
+ end
@@ -0,0 +1,5 @@
1
+ describe "feature requests" do
2
+ it "should work with http://www.zvon.org/xxl/XMLSchemaTutorial/Output/ser_keys_st0.html examples"
3
+ # if current() isnot implemented then look into implementing
4
+ # extensions to libxslt http://xmlsoft.org/XSLT/extensions.html
5
+ end
@@ -0,0 +1,37 @@
1
+ <mets xmlns="http://www.loc.gov/METS/"
2
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xmlns:xlink="http://www.w3.org/1999/xlink"
4
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
5
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
6
+ <amdSec>
7
+
8
+ <!-- required agreement info -->
9
+ <digiprovMD ID="digiprov-1">
10
+ <mdWrap MDTYPE="OTHER" OTHERMDTYPE="DATISS">
11
+ <xmlData>
12
+ <daitss xmlns="http://www.fcla.edu/dls/md/daitss/">
13
+ <AGREEMENT_INFO ACCOUNT="YOUR_ACCOUNT" PROJECT="THIS_PROJECT"/>
14
+ </daitss>
15
+ </xmlData>
16
+ </mdWrap>
17
+ </digiprovMD>
18
+
19
+ </amdSec>
20
+
21
+ <!-- must have checksum, checksum type & file location -->
22
+ <fileSec>
23
+ <fileGrp>
24
+ <file ID="file-1" xCHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
25
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
26
+ </file>
27
+ </fileGrp>
28
+ </fileSec>
29
+
30
+ <!-- struct map can be fashioned in any way valid to mets -->
31
+ <structMap>
32
+ <div>
33
+ <fptr FILEID="file-1"/>
34
+ </div>
35
+ </structMap>
36
+
37
+ </mets>
@@ -0,0 +1,68 @@
1
+ <mets xmlns="http://www.loc.gov/METS/"
2
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
+ xmlns:xlink="http://www.w3.org/1999/xlink"
4
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
5
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
6
+ <amdSec>
7
+
8
+ <!-- required agreement info -->
9
+ <digiprovMD ID="digiprov-1">
10
+ <mdWrap MDTYPE="OTHER" OTHERMDTYPE="DATISS">
11
+ <xmlData>
12
+ <daitss xmlns="http://www.fcla.edu/dls/md/daitss/">
13
+ <AGREEMENT_INFO ACCOUNT="YOUR_ACCOUNT" PROJECT="THIS_PROJECT"/>
14
+ </daitss>
15
+ </xmlData>
16
+ </mdWrap>
17
+ </digiprovMD>
18
+
19
+ </amdSec>
20
+
21
+ <!-- must have checksum, checksum type & file location -->
22
+ <fileSec>
23
+ <fileGrp>
24
+ <file ID="file-1" CHECKSUM="259c4fabf48fd8fa664fb7123e470624" CHECKSUMTYPE="MD5">
25
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_1"/>
26
+ </file>
27
+
28
+ <file ID="file-2" CHECKSUM="6913536569c62bb11a4bb453e0553a56" CHECKSUMTYPE="MD5">
29
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_2"/>
30
+ </file>
31
+
32
+ <file ID="file-3" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
33
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_3"/>
34
+ </file>
35
+
36
+ <file ID="file-4" CHECKSUM="1bd851567835a6b6ce0b78502f1d0abe" CHECKSUMTYPE="MD5">
37
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_4"/>
38
+ </file>
39
+
40
+ <file ID="file-5" CHECKSUM="545976801e805db1c77cb9f4fa55c051" CHECKSUMTYPE="MD5">
41
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_5"/>
42
+ </file>
43
+
44
+ <file ID="file-6">
45
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_6"/>
46
+ </file>
47
+
48
+ <file ID="file-7" CHECKSUM="bfb0cf6095cd5ab3837119b11d3f285500000000z" CHECKSUMTYPE="SHA-1">
49
+ <FLocat LOCTYPE="URL" xlink:href="xxx/content_file_7"/>
50
+ </file>
51
+
52
+ </fileGrp>
53
+ </fileSec>
54
+
55
+ <!-- struct map can be fashioned in any way valid to mets -->
56
+ <structMap>
57
+ <div>
58
+ <fptr FILEID="file-1"/>
59
+ <fptr FILEID="file-2"/>
60
+ <fptr FILEID="file-3"/>
61
+ <fptr FILEID="file-4"/>
62
+ <fptr FILEID="file-5"/>
63
+ <fptr FILEID="file-6"/>
64
+ <fptr FILEID="file-7"/>
65
+ </div>
66
+ </structMap>
67
+
68
+ </mets>
@@ -0,0 +1,102 @@
1
+ <!-- Example showing PREMIS in METS -->
2
+
3
+ <mets xmlns="http://www.loc.gov/METS/"
4
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ xmlns:xlink="http://www.w3.org/1999/xlink"
6
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
7
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
8
+ <amdSec>
9
+
10
+
11
+ <!-- WRONG: digiprovMD/object -->
12
+ <digiprovMD ID="digiprov-1">
13
+ <mdWrap MDTYPE="PREMIS">
14
+ <xmlData>
15
+
16
+ <object xmlns="info:lc/xmlns/premis-v2" xsi:type="file">
17
+ <objectIdentifier>
18
+ <objectIdentifierType>DAITSS2</objectIdentifierType>
19
+ <objectIdentifierValue>daitss://archive.fcla.edu/p926/f0</objectIdentifierValue>
20
+ </objectIdentifier>
21
+ <objectCharacteristics>
22
+ <compositionLevel>0</compositionLevel>
23
+ <fixity>
24
+ <messageDigestAlgorithm>MD5</messageDigestAlgorithm>
25
+ <messageDigest>2de9ef79df730f93e40819625cf7bcb2</messageDigest>
26
+ </fixity>
27
+ <size>3001452</size>
28
+ <format>
29
+ <formatDesignation>
30
+ <formatName>TIFF</formatName>
31
+ <formatVersion>4.0</formatVersion>
32
+ </formatDesignation>
33
+ <formatRegistry>
34
+ <formatRegistryName>http://www.nationalarchives.gov.uk/pronom</formatRegistryName>
35
+ <formatRegistryKey>fmt/8</formatRegistryKey>
36
+ </formatRegistry>
37
+ </format>
38
+ </objectCharacteristics>
39
+ <storage>
40
+ <contentLocation>
41
+ <contentLocationType>URL</contentLocationType>
42
+ <contentLocationValue>http://archive.fcla.edu:5000/silos/gainesville-1/p926</contentLocationValue>
43
+ </contentLocation>
44
+ <storageMedium>Hard disk</storageMedium>
45
+ </storage>
46
+ </object>
47
+
48
+ </xmlData>
49
+ </mdWrap>
50
+ </digiprovMD>
51
+
52
+ <!-- WRONG techMD/event -->
53
+ <techMD ID="tech-1">
54
+ <mdWrap MDTYPE="PREMIS">
55
+ <xmlData>
56
+
57
+ <event xmlns="info:lc/xmlns/premis-v2">
58
+ <eventIdentifier>
59
+ <eventIdentifierType>DAITSS</eventIdentifierType>
60
+ <eventIdentifierValue>1</eventIdentifierValue>
61
+ </eventIdentifier>
62
+ <eventType>Format Validation</eventType>
63
+ <eventDateTime>2008-05-06T10:40:22-04:00</eventDateTime>
64
+ <eventOutcomeInformation>
65
+ <eventOutcome>Invalid</eventOutcome>
66
+ <eventOutcomeDetail>
67
+ <eventOutcomeDetailExtension>
68
+ <anomaly>ill-formed DateTime value</anomaly>
69
+ </eventOutcomeDetailExtension>
70
+ </eventOutcomeDetail>
71
+ </eventOutcomeInformation>
72
+ <linkingAgentIdentifier>
73
+ <linkingAgentIdentifierType>URI</linkingAgentIdentifierType>
74
+ <linkingAgentIdentifierValue>http://archive.fcla.edu/format-description</linkingAgentIdentifierValue>
75
+ </linkingAgentIdentifier>
76
+ <linkingObjectIdentifier>
77
+ <linkingObjectIdentifierType>DAITSS2</linkingObjectIdentifierType>
78
+ <linkingObjectIdentifierValue>daitss://archive.fcla.edu/p926/f0</linkingObjectIdentifierValue>
79
+ </linkingObjectIdentifier>
80
+ </event>
81
+
82
+ </xmlData>
83
+ </mdWrap>
84
+ </techMD>
85
+
86
+ </amdSec>
87
+
88
+ <fileSec>
89
+ <fileGrp>
90
+ <file ID="file-1" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
91
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
92
+ </file>
93
+ </fileGrp>
94
+ </fileSec>
95
+
96
+ <structMap>
97
+ <div>
98
+ <fptr FILEID="file-1"/>
99
+ </div>
100
+ </structMap>
101
+
102
+ </mets>
@@ -0,0 +1,103 @@
1
+ <!-- Example showing PREMIS in METS -->
2
+
3
+ <mets xmlns="http://www.loc.gov/METS/"
4
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5
+ xmlns:xlink="http://www.w3.org/1999/xlink"
6
+ xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd
7
+ http://www.fcla.edu/dls/md/daitss/ http://www.fcla.edu/dls/md/daitss/daitss.xsd">
8
+ <amdSec>
9
+
10
+
11
+ <!-- techMD/object -->
12
+ <techMD ID="tech-1">
13
+ <mdWrap MDTYPE="PREMIS">
14
+ <xmlData>
15
+
16
+ <object xmlns="info:lc/xmlns/premis-v2" xsi:type="file">
17
+ <objectIdentifier>
18
+ <objectIdentifierType>DAITSS2</objectIdentifierType>
19
+ <objectIdentifierValue>daitss://archive.fcla.edu/p926/f0</objectIdentifierValue>
20
+ </objectIdentifier>
21
+ <objectCharacteristics>
22
+ <compositionLevel>0</compositionLevel>
23
+ <fixity>
24
+ <messageDigestAlgorithm>MD5</messageDigestAlgorithm>
25
+ <messageDigest>2de9ef79df730f93e40819625cf7bcb2</messageDigest>
26
+ </fixity>
27
+ <size>3001452</size>
28
+ <format>
29
+ <formatDesignation>
30
+ <formatName>TIFF</formatName>
31
+ <formatVersion>4.0</formatVersion>
32
+ </formatDesignation>
33
+ <formatRegistry>
34
+ <formatRegistryName>http://www.nationalarchives.gov.uk/pronom</formatRegistryName>
35
+ <formatRegistryKey>fmt/8</formatRegistryKey>
36
+ </formatRegistry>
37
+ </format>
38
+ </objectCharacteristics>
39
+ <storage>
40
+ <contentLocation>
41
+ <contentLocationType>URL</contentLocationType>
42
+ <contentLocationValue>http://archive.fcla.edu:5000/silos/gainesville-1/p926</contentLocationValue>
43
+ </contentLocation>
44
+ <storageMedium>Hard disk</storageMedium>
45
+ </storage>
46
+ </object>
47
+
48
+ </xmlData>
49
+ </mdWrap>
50
+ </techMD>
51
+
52
+
53
+ <!-- digiprovMD/event -->
54
+ <digiprovMD ID="digiprov-1">
55
+ <mdWrap MDTYPE="PREMIS">
56
+ <xmlData>
57
+
58
+ <event xmlns="info:lc/xmlns/premis-v2">
59
+ <eventIdentifier>
60
+ <eventIdentifierType>DAITSS</eventIdentifierType>
61
+ <eventIdentifierValue>1</eventIdentifierValue>
62
+ </eventIdentifier>
63
+ <eventType>Format Validation</eventType>
64
+ <eventDateTime>2008-05-06T10:40:22-04:00</eventDateTime>
65
+ <eventOutcomeInformation>
66
+ <eventOutcome>Invalid</eventOutcome>
67
+ <eventOutcomeDetail>
68
+ <eventOutcomeDetailExtension>
69
+ <anomaly>ill-formed DateTime value</anomaly>
70
+ </eventOutcomeDetailExtension>
71
+ </eventOutcomeDetail>
72
+ </eventOutcomeInformation>
73
+ <linkingAgentIdentifier>
74
+ <linkingAgentIdentifierType>URI</linkingAgentIdentifierType>
75
+ <linkingAgentIdentifierValue>http://archive.fcla.edu/format-description</linkingAgentIdentifierValue>
76
+ </linkingAgentIdentifier>
77
+ <linkingObjectIdentifier>
78
+ <linkingObjectIdentifierType>DAITSS2</linkingObjectIdentifierType>
79
+ <linkingObjectIdentifierValue>daitss://archive.fcla.edu/p926/f0</linkingObjectIdentifierValue>
80
+ </linkingObjectIdentifier>
81
+ </event>
82
+
83
+ </xmlData>
84
+ </mdWrap>
85
+ </digiprovMD>
86
+
87
+ </amdSec>
88
+
89
+ <fileSec>
90
+ <fileGrp>
91
+ <file ID="file-1" CHECKSUM="5ddb5736a014619bbbb3684bc6ae1613" CHECKSUMTYPE="MD5">
92
+ <FLocat LOCTYPE="URL" xlink:href="content_file"/>
93
+ </file>
94
+ </fileGrp>
95
+ </fileSec>
96
+
97
+ <structMap>
98
+ <div>
99
+ <fptr FILEID="file-1"/>
100
+ </div>
101
+ </structMap>
102
+
103
+ </mets>