saxon-rb 0.4.0-java → 0.7.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +429 -42
  3. data/.ruby-version +1 -1
  4. data/.yardopts +1 -0
  5. data/Gemfile +2 -2
  6. data/README.md +358 -10
  7. data/Rakefile +237 -7
  8. data/docs/templates/plugin.rb +73 -0
  9. data/lib/net/sf/saxon/Saxon-HE/{9.9.1-5/Saxon-HE-9.9.1-5.jar → 9.9.1-6/Saxon-HE-9.9.1-6.jar} +0 -0
  10. data/lib/saxon-rb.rb +0 -0
  11. data/lib/{saxon_jars.rb → saxon-rb_jars.rb} +2 -2
  12. data/lib/saxon.rb +13 -0
  13. data/lib/saxon/axis_iterator.rb +8 -1
  14. data/lib/saxon/configuration.rb +16 -13
  15. data/lib/saxon/document_builder.rb +216 -5
  16. data/lib/saxon/feature_flags.rb +11 -0
  17. data/lib/saxon/feature_flags/errors.rb +8 -0
  18. data/lib/saxon/feature_flags/helpers.rb +15 -0
  19. data/lib/saxon/feature_flags/version.rb +100 -0
  20. data/lib/saxon/item_type.rb +129 -89
  21. data/lib/saxon/item_type/lexical_string_conversion.rb +214 -59
  22. data/lib/saxon/item_type/value_to_ruby.rb +25 -0
  23. data/lib/saxon/loader.rb +6 -1
  24. data/lib/saxon/nokogiri.rb +78 -0
  25. data/lib/saxon/occurrence_indicator.rb +32 -3
  26. data/lib/saxon/processor.rb +50 -5
  27. data/lib/saxon/qname.rb +37 -2
  28. data/lib/saxon/s9api.rb +5 -0
  29. data/lib/saxon/sequence_type.rb +131 -0
  30. data/lib/saxon/serializer.rb +3 -137
  31. data/lib/saxon/serializer/destination.rb +80 -0
  32. data/lib/saxon/serializer/object.rb +93 -0
  33. data/lib/saxon/serializer/output_properties.rb +83 -0
  34. data/lib/saxon/source.rb +207 -71
  35. data/lib/saxon/version.rb +7 -1
  36. data/lib/saxon/version/library.rb +89 -0
  37. data/lib/saxon/xdm.rb +7 -0
  38. data/lib/saxon/xdm/array.rb +16 -0
  39. data/lib/saxon/xdm/atomic_value.rb +10 -2
  40. data/lib/saxon/xdm/empty_sequence.rb +13 -0
  41. data/lib/saxon/xdm/external_object.rb +1 -0
  42. data/lib/saxon/xdm/function_item.rb +1 -0
  43. data/lib/saxon/xdm/item.rb +7 -0
  44. data/lib/saxon/xdm/map.rb +38 -0
  45. data/lib/saxon/xdm/node.rb +50 -1
  46. data/lib/saxon/xdm/sequence_like.rb +15 -0
  47. data/lib/saxon/xdm/value.rb +21 -5
  48. data/lib/saxon/xpath.rb +9 -0
  49. data/lib/saxon/xpath/compiler.rb +37 -2
  50. data/lib/saxon/xpath/executable.rb +53 -28
  51. data/lib/saxon/xpath/static_context.rb +25 -40
  52. data/lib/saxon/xpath/variable_declaration.rb +16 -49
  53. data/lib/saxon/xslt.rb +12 -0
  54. data/lib/saxon/xslt/compiler.rb +75 -6
  55. data/lib/saxon/xslt/evaluation_context.rb +30 -4
  56. data/lib/saxon/xslt/executable.rb +206 -29
  57. data/lib/saxon/xslt/invocation.rb +97 -0
  58. data/saxon-rb.gemspec +3 -3
  59. metadata +22 -10
  60. data/saxon.gemspec +0 -30
@@ -0,0 +1,73 @@
1
+ include YARD
2
+ include Templates
3
+
4
+ module JavadocHtmlHelper
5
+ JAVA_TYPE_MATCHER = /\A(?:[a-z_$](?:[a-z0-9_$]*)\.)+[A-Z][A-Za-z_$]*/
6
+ RUBY_COLLECTION_TYPE_MATCHER = /\A(?:[A-Z][A-Za-z0-9_])(?:::[A-Z][A-Za-z0-9_]*)*</
7
+ SAXON_TYPE_MATCHER = /\A(?:net\.sf\.saxon|com\.saxonica)/
8
+
9
+ def format_types(typelist, brackets = true)
10
+ return unless typelist.is_a?(Array)
11
+ list = typelist.map { |type|
12
+ case type
13
+ when JAVA_TYPE_MATCHER
14
+ format_java_type(type)
15
+ else
16
+ super([type], false)
17
+ end
18
+ }
19
+ list.empty? ? "" : (brackets ? "(#{list.join(", ")})" : list.join(", "))
20
+ end
21
+
22
+ def format_java_type(type)
23
+ "<tt>" + linkify_saxon_type(type) + "</tt>"
24
+ end
25
+
26
+ def linkify_saxon_type(type)
27
+ case type
28
+ when SAXON_TYPE_MATCHER
29
+ link = url_for_java_object(type)
30
+ else
31
+ link = nil
32
+ end
33
+ link ? link_url(link, type, :title => h(type)) : type
34
+ end
35
+
36
+ def linkify(*args)
37
+ if args.first.is_a?(String)
38
+ case args.first
39
+ when JAVA_TYPE_MATCHER
40
+ link = url_for_java_object(args.first)
41
+ title = args.first
42
+ link ? link_url(link, title, :title => h(title)) : title
43
+ else
44
+ super
45
+ end
46
+ else
47
+ super
48
+ end
49
+ end
50
+
51
+ def url_for(obj, anchor = nil, relative = true)
52
+ case obj
53
+ when JAVA_TYPE_MATCHER
54
+ url_for_java_object(obj, anchor, relative)
55
+ else
56
+ super
57
+ end
58
+ end
59
+
60
+ def url_for_java_object(obj, anchor = nil, relative = nil)
61
+ case obj
62
+ when SAXON_TYPE_MATCHER
63
+ package, _, klass = obj.rpartition(".")
64
+ "http://saxonica.com/documentation/index.html#!javadoc/#{package}/#{klass}"
65
+ else
66
+ path = obj.split(".").join("/")
67
+ "https://docs.oracle.com/javase/8/docs/api/index.html?#{path}.html"
68
+ end
69
+ end
70
+ end
71
+
72
+ Template.extra_includes << proc { |opts| JavadocHtmlHelper if opts.format == :html }
73
+ # Engine.register_template_path(File.dirname(__FILE__))
File without changes
@@ -2,9 +2,9 @@
2
2
  begin
3
3
  require 'jar_dependencies'
4
4
  rescue LoadError
5
- require 'net/sf/saxon/Saxon-HE/9.9.1-5/Saxon-HE-9.9.1-5.jar'
5
+ require 'net/sf/saxon/Saxon-HE/9.9.1-6/Saxon-HE-9.9.1-6.jar'
6
6
  end
7
7
 
8
8
  if defined? Jars
9
- require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-5'
9
+ require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-6'
10
10
  end
@@ -3,4 +3,17 @@ require_relative 'saxon/processor'
3
3
 
4
4
  # An idiomatic Ruby wrapper around the Saxon XML processing library.
5
5
  module Saxon
6
+ # Parse an XML document using a vanilla {DocumentBuilder} from the default
7
+ # {Processor}
8
+ # @see Saxon::Processor#XML
9
+ def self.XML(input, opts = {})
10
+ Processor.default.XML(input, opts)
11
+ end
12
+
13
+ # Compile an XSLT Stylesheet using a new {XSLT::Compiler} from the default
14
+ # {Processor}
15
+ # @see Saxon::Processor#XSLT
16
+ def self.XSLT(input, opts = {}, &block)
17
+ Processor.default.XSLT(input, opts, &block)
18
+ end
6
19
  end
@@ -1,7 +1,12 @@
1
1
  require_relative 's9api'
2
2
 
3
3
  module Saxon
4
- # An XPath Data Model Node object, representing an XML document, or an element or one of the other node chunks in the XDM.
4
+ # An iterator across an XPath axis of an XDM document, e.g. down to children
5
+ # (+child+), up to the root (+ancestor+)
6
+ # @example iterate over child nodes
7
+ # AxisIterator.new(node, :child).each do |child_node|
8
+ # puts child_node.node_name
9
+ # end
5
10
  class AxisIterator
6
11
  include Enumerable
7
12
 
@@ -18,6 +23,8 @@ module Saxon
18
23
  s9_sequence_iterator
19
24
  end
20
25
 
26
+ # yields each node in the sequence
27
+ # @yieldparam [Saxon::XDM::Node] the next node in the sequence
21
28
  def each(&block)
22
29
  s9_sequence_iterator.lazy.map { |s9_xdm_node| Saxon::XDM::Node.new(s9_xdm_node) }.each(&block)
23
30
  end
@@ -2,14 +2,16 @@ require 'saxon/s9api'
2
2
  require 'saxon/parse_options'
3
3
 
4
4
  module Saxon
5
- # Wraps the <tt>net.saxon.Configuration</tt> class. See
6
- # http://saxonica.com/documentation9.5/javadoc/net/sf/saxon/Configuration.html
7
- # for details of what configuration options are available and what values
8
- # they accept. See
9
- # http://saxonica.com/documentation9.5/javadoc/net/sf/saxon/lib/FeatureKeys.html
10
- # for details of the constant names used to access the values
5
+ # Wraps the <tt>net.sf.saxon.Configuration</tt> class.
6
+ #
7
+ # See {net.sf.saxon.Configuration} for details of what configuration options
8
+ # are available and what values they accept.
9
+ #
10
+ # See {net.sf.saxon.lib.FeatureKeys} for details of the constant names used to
11
+ # access the values
11
12
  class Configuration
12
13
  DEFAULT_SEMAPHORE = Mutex.new
14
+ private_constant :DEFAULT_SEMAPHORE
13
15
 
14
16
  # Provides a processor with default configuration. Essentially a singleton
15
17
  # instance
@@ -59,22 +61,23 @@ module Saxon
59
61
  end
60
62
 
61
63
  # Get a configuration option value
62
- # See https://www.saxonica.com/html/documentation/javadoc/net/sf/saxon/lib/FeatureKeys.html
63
- # for details of the available options. Use the constant name as a string
64
- # or symbol as the option
64
+ #
65
+ # See {net.sf.saxon.lib.FeatureKeys} for details of the available options.
66
+ # Use the constant name as a string or symbol as the option, e.g.
67
+ # +:allow_multhreading+, +'ALLOW_MULTITHREADING'+, +'allow_multithreading'+.
65
68
  #
66
69
  # @param option [String, Symbol]
67
70
  # @return [Object] the value of the configuration option
68
71
  # @raise [NameError] if the option name does not exist
72
+ # @see net.sf.saxon.lib.FeatureKeys
69
73
  def [](option)
70
74
  @config.getConfigurationProperty(option_url(option))
71
75
  end
72
76
 
73
- # Get a configuration option value
74
- # See http://saxonica.com/documentation9.5/javadoc/net/sf/saxon/lib/FeatureKeys.html
75
- # for details of the available options. Use the constant name as a string
76
- # or symbol as the option
77
+ # Set a configuration option value. See {#[]} for details about the option
78
+ # names.
77
79
  #
80
+ # @see #[]
78
81
  # @param option [String, Symbol]
79
82
  # @param value [Object] the value of the configuration option
80
83
  # @return [Object] the value you passed in
@@ -4,11 +4,217 @@ module Saxon
4
4
  # Builds XDM objects from XML sources, for use in XSLT or for query and
5
5
  # access
6
6
  class DocumentBuilder
7
+ # Provides a simple configuraion DSL for DocumentBuilders.
8
+ # @see DocumentBuilder.create
9
+ class ConfigurationDSL
10
+ # @api private
11
+ #
12
+ # Create a new instance and +instance_exec+ the passed-in block against it
13
+ def self.define(document_builder, block)
14
+ new(document_builder).instance_exec(&block)
15
+ end
16
+
17
+ # @api private
18
+ def initialize(document_builder)
19
+ @document_builder = document_builder
20
+ end
21
+
22
+ # Sets line numbering on or off
23
+ #
24
+ # @see DocumentBuilder#line_numbering=
25
+ #
26
+ # @param value [Boolean] on (true) or off (false)
27
+ def line_numbering(value)
28
+ @document_builder.line_numbering = value
29
+ end
30
+
31
+ # Sets the base URI of documents created using this instance.
32
+ #
33
+ # @see DocumentBuilder.base_uri=
34
+ #
35
+ # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use
36
+ def base_uri(value)
37
+ @document_builder.base_uri = value
38
+ end
39
+
40
+ # Sets the base URI of documents created using this instance.
41
+ #
42
+ # @see DocumentBuilder.base_uri=
43
+ #
44
+ # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use
45
+ def whitespace_stripping_policy(value)
46
+ @document_builder.whitespace_stripping_policy = value
47
+ end
48
+
49
+ # Sets the base URI of documents created using this instance.
50
+ #
51
+ # @see DocumentBuilder.base_uri=
52
+ #
53
+ # @param value [String, URI::File, URI::HTTP] The (absolute) base URI to use
54
+ def dtd_validation(value)
55
+ @document_builder.dtd_validation = value
56
+ end
57
+ end
58
+
59
+
60
+ # Create a new DocumentBuilder that can be used to build new XML documents
61
+ # with the passed-in {Saxon::Processor}. If a block is passed in it's
62
+ # executed as a DSL for configuring the builder instance.
63
+ #
64
+ # @param processor [Saxon::Processor] the Processor
65
+ # @yield An DocumentBuilder configuration DSL block
66
+ # @return [Saxon::DocumentBuilder] the new instance
67
+ def self.create(processor, &block)
68
+ new(processor.to_java.newDocumentBuilder, &block)
69
+ end
70
+
71
+ attr_reader :s9_document_builder
72
+ private :s9_document_builder
73
+
7
74
  # @api private
8
75
  # @param [net.sf.saxon.s9api.DocumentBuilder] s9_document_builder The
9
- # Saxon DocumentBuilder instance to wrap
10
- def initialize(s9_document_builder)
76
+ # Saxon DocumentBuilder instance to wrap
77
+ def initialize(s9_document_builder, &block)
11
78
  @s9_document_builder = s9_document_builder
79
+ if block_given?
80
+ ConfigurationDSL.define(self, block)
81
+ end
82
+ end
83
+
84
+ # Report whether documents created using this instance will keep track of
85
+ # the line and column numbers of elements.
86
+ #
87
+ # @return [Boolean] whether line numbering will be tracked
88
+ def line_numbering?
89
+ s9_document_builder.isLineNumbering
90
+ end
91
+
92
+
93
+ # Switch tracking of line and column numbers for elements in documents
94
+ # created by this instance on or off
95
+ #
96
+ # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setLineNumbering
97
+ #
98
+ # @param on_or_not [Boolean] whether or not to track line numbering
99
+ def line_numbering=(on_or_not)
100
+ s9_document_builder.setLineNumbering(on_or_not)
101
+ end
102
+
103
+ # Return the default base URI to be used when building documents using this
104
+ # instance. This value will be ignored if the source being parsed has an
105
+ # intrinsic base URI (e.g. a File).
106
+ #
107
+ # Returns +nil+ if no URI is set (the default).
108
+ #
109
+ # @return [nil, URI::File, URI::HTTP] the default base URI (or nil)
110
+ def base_uri
111
+ uri = s9_document_builder.getBaseURI
112
+ uri.nil? ? uri : URI(uri.to_s)
113
+ end
114
+
115
+ # Set the base URI of documents created using this instance. This value will
116
+ # be ignored if the source being parsed has an intrinsic base URI (e.g. a
117
+ # File)
118
+ #
119
+ # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setBaseURI
120
+ #
121
+ # @param uri [String, URI::File, URI::HTTP] The (absolute) base URI to use
122
+ def base_uri=(uri)
123
+ s9_document_builder.setBaseURI(java.net.URI.new(uri.to_s))
124
+ end
125
+
126
+ # Return the Whitespace stripping policy for this instance. Returns one of
127
+ # the standard policy names as a symbol, or the custom Java
128
+ # WhitespaceStrippingPolicy if one was defined using
129
+ # +#whitespace_stripping_policy = ->(qname) { ... }+. (See
130
+ # {#whitespace_stripping_policy=} for more.)
131
+ #
132
+ # +:all+: All whitespace-only nodes will be discarded
133
+ #
134
+ # +:none+: No whitespace-only nodes will be discarded (the default if DTD or
135
+ # schema validation is not in effect)
136
+ #
137
+ # +:ignorable+: Whitespace-only nodes inside elements defined as
138
+ # element-only in the DTD or schema being used will be discarded (the
139
+ # default if DTD or schema validation is in effect)
140
+ #
141
+ # +:unspecified+: the default, which in practice means :ignorable if DTD or
142
+ # schema validation is in effect, and :none otherwise.
143
+ #
144
+ # @return [:all, :none, :ignorable, :unspecified, Proc]
145
+ def whitespace_stripping_policy
146
+ s9_policy = s9_document_builder.getWhitespaceStrippingPolicy
147
+ case s9_policy
148
+ when Saxon::S9API::WhitespaceStrippingPolicy::UNSPECIFIED
149
+ :unspecified
150
+ when Saxon::S9API::WhitespaceStrippingPolicy::NONE
151
+ :none
152
+ when Saxon::S9API::WhitespaceStrippingPolicy::IGNORABLE
153
+ :ignorable
154
+ when Saxon::S9API::WhitespaceStrippingPolicy::ALL
155
+ :all
156
+ else
157
+ s9_policy
158
+ end
159
+ end
160
+
161
+ # Set the whitespace stripping policy to be used for documents built with
162
+ # this instance.
163
+ #
164
+ # Possible values are:
165
+ #
166
+ # * One of the standard policies, as a symbol (+:all+, +:none+,
167
+ # +:ignorable+, +:unspecified+, see {#whitespace_stripping_policy}).
168
+ # * A Java +net.sf.saxon.s9api.WhitesapceStrippingPolicy+ instance
169
+ # * A Proc/lambda that is handed an element name as a {Saxon::QName}, and
170
+ # should return true (if whitespace should be stripped for this element)
171
+ # or false (it should not).
172
+ # @example
173
+ # whitespace_stripping_policy = ->(element_qname) {
174
+ # element_qname == Saxon::QName.clark("{http://example.org/}element-name")
175
+ # }
176
+ #
177
+ # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setWhitespaceStrippingPolicy
178
+ # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/WhitespaceStrippingPolicy
179
+ # @param policy [Symbol, Proc, Saxon::S9API::WhitespaceStrippingPolicy] the
180
+ # policy to use
181
+ def whitespace_stripping_policy=(policy)
182
+ case policy
183
+ when :unspecified, :none, :ignorable, :all
184
+ s9_policy = Saxon::S9API::WhitespaceStrippingPolicy.const_get(policy.to_s.upcase.to_sym)
185
+ when Proc
186
+ wrapped_policy = ->(s9_qname) {
187
+ policy.call(Saxon::QName.new(s9_qname))
188
+ }
189
+ s9_policy = Saxon::S9API::WhitespaceStrippingPolicy.makeCustomPolicy(wrapped_policy)
190
+ when Saxon::S9API::WhitespaceStrippingPolicy
191
+ s9_policy = policy
192
+ else
193
+ raise InvalidWhitespaceStrippingPolicyError, "#{policy.inspect} is not one of the allowed Symbols, or a custom policy"
194
+ end
195
+ s9_document_builder.setWhitespaceStrippingPolicy(s9_policy)
196
+ end
197
+
198
+ # @return [Boolean] whether DTD Validation is enabled
199
+ def dtd_validation?
200
+ s9_document_builder.isDTDValidation
201
+ end
202
+
203
+ # Switches DTD validation on or off.
204
+ #
205
+ # It's important to note that DTD validation only applies to documents that
206
+ # contain a +<!doctype>+, but switching DTD validation off doesn't stop the
207
+ # XML parser Saxon uses from trying to retrieve the DTD that's referenced,
208
+ # which can mean network requests. By default, the SAX parser Saxon uses
209
+ # (Xerces) doesn't make use of XML catalogs, which causes problems when documents reference a DTD with a relative path as in:
210
+ # <!DOCTYPE root-element SYSTEM "example.dtd">
211
+ # This can be controlled through a configuration option, however.
212
+ #
213
+ # @see https://www.saxonica.com/documentation9.9/index.html#!javadoc/net.sf.saxon.s9api/DocumentBuilder@setDTDValidation
214
+ # @see https://www.saxonica.com/documentation9.9/index.html#!sourcedocs/controlling-parsing
215
+ # @param on [Boolean] whether DTD Validation should be enabled
216
+ def dtd_validation=(on)
217
+ s9_document_builder.setDTDValidation(on)
12
218
  end
13
219
 
14
220
  # @param [Saxon::Source] source The Saxon::Source containing the source
@@ -16,13 +222,18 @@ module Saxon
16
222
  # @return [Saxon::XDM::Node] The Saxon::XDM::Node representing the root of the
17
223
  # document tree
18
224
  def build(source)
19
- XDM::Node.new(@s9_document_builder.build(source.to_java))
225
+ XDM::Node.new(s9_document_builder.build(source.to_java))
20
226
  end
21
227
 
22
- # @return [net.sf.saxon.s9api.DocumentBuilder] The underlying Java Saxon
228
+ # @return [Java::NetSfSaxonS9api::DocumentBuilder] The underlying Java Saxon
23
229
  # DocumentBuilder instance
24
230
  def to_java
25
- @s9_document_builder
231
+ s9_document_builder
26
232
  end
27
233
  end
234
+
235
+ # Error raised when someone tries to set an invalid whitespace stripping
236
+ # policy on a {DocumentBuilder}
237
+ class InvalidWhitespaceStrippingPolicyError < RuntimeError
238
+ end
28
239
  end
@@ -0,0 +1,11 @@
1
+ require_relative 'feature_flags/version'
2
+ require_relative 'feature_flags/helpers'
3
+
4
+ module Saxon
5
+ # Allows saxon-rb features to be switched off if they can't be used under one
6
+ # of the otherwise-supported Saxon versions
7
+ #
8
+ # @api private
9
+ module FeatureFlags
10
+ end
11
+ end
@@ -0,0 +1,8 @@
1
+ module Saxon
2
+ module FeatureFlags
3
+ # Error raised if a feature is not available under the loaded version of
4
+ # Saxon
5
+ class UnavailableInThisSaxonVersionError < StandardError
6
+ end
7
+ end
8
+ end