saxon-rb 0.4.0-java → 0.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +429 -42
  3. data/Gemfile +2 -2
  4. data/README.md +317 -10
  5. data/Rakefile +237 -7
  6. data/lib/net/sf/saxon/Saxon-HE/{9.9.1-5/Saxon-HE-9.9.1-5.jar → 9.9.1-6/Saxon-HE-9.9.1-6.jar} +0 -0
  7. data/lib/saxon-rb.rb +1 -0
  8. data/lib/{saxon_jars.rb → saxon-rb_jars.rb} +2 -2
  9. data/lib/saxon.rb +13 -0
  10. data/lib/saxon/axis_iterator.rb +8 -1
  11. data/lib/saxon/configuration.rb +1 -0
  12. data/lib/saxon/item_type.rb +12 -17
  13. data/lib/saxon/item_type/lexical_string_conversion.rb +136 -58
  14. data/lib/saxon/item_type/value_to_ruby.rb +13 -0
  15. data/lib/saxon/loader.rb +4 -1
  16. data/lib/saxon/nokogiri.rb +78 -0
  17. data/lib/saxon/occurrence_indicator.rb +32 -3
  18. data/lib/saxon/processor.rb +32 -1
  19. data/lib/saxon/qname.rb +37 -2
  20. data/lib/saxon/s9api.rb +5 -0
  21. data/lib/saxon/sequence_type.rb +131 -0
  22. data/lib/saxon/source.rb +207 -71
  23. data/lib/saxon/version.rb +1 -1
  24. data/lib/saxon/xdm.rb +7 -0
  25. data/lib/saxon/xdm/array.rb +16 -0
  26. data/lib/saxon/xdm/atomic_value.rb +7 -1
  27. data/lib/saxon/xdm/empty_sequence.rb +13 -0
  28. data/lib/saxon/xdm/external_object.rb +1 -0
  29. data/lib/saxon/xdm/function_item.rb +1 -0
  30. data/lib/saxon/xdm/item.rb +7 -0
  31. data/lib/saxon/xdm/map.rb +38 -0
  32. data/lib/saxon/xdm/node.rb +19 -1
  33. data/lib/saxon/xdm/sequence_like.rb +15 -0
  34. data/lib/saxon/xdm/value.rb +21 -5
  35. data/lib/saxon/xpath.rb +9 -0
  36. data/lib/saxon/xpath/compiler.rb +36 -1
  37. data/lib/saxon/xpath/executable.rb +53 -28
  38. data/lib/saxon/xpath/static_context.rb +19 -39
  39. data/lib/saxon/xpath/variable_declaration.rb +16 -49
  40. data/lib/saxon/xslt.rb +12 -0
  41. data/lib/saxon/xslt/compiler.rb +75 -6
  42. data/lib/saxon/xslt/evaluation_context.rb +19 -3
  43. data/lib/saxon/xslt/executable.rb +204 -14
  44. data/saxon-rb.gemspec +1 -1
  45. metadata +9 -7
  46. data/saxon.gemspec +0 -30
data/lib/saxon/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'java'
2
2
  require 'saxon/jaxp'
3
3
  require 'uri'
4
+ require 'open-uri'
4
5
  require 'pathname'
5
6
 
6
7
  module Saxon
@@ -8,10 +9,13 @@ module Saxon
8
9
  # the XML bytestream in. Provides some extra methods to make handling closing
9
10
  # the source and its inputstream after consumption more idiomatic
10
11
  class Source
12
+ # Helper methods for getting Java-useful representations of source document
13
+ # strings and files
11
14
  module Helpers
12
15
  # Given a File, or IO object which will return either #path or
13
16
  # #base_uri, return the #base_uri, if present, or the #path, if present, or
14
17
  # nil
18
+ #
15
19
  # @param [File, IO] io A File or IO
16
20
  # object representing the input XML file or data, or a String containing
17
21
  # the XML
@@ -23,30 +27,92 @@ module Saxon
23
27
  io.path if io.respond_to?(:path)
24
28
  end
25
29
 
26
- # Given a File or IO return a Java InputStream
27
- # @param [File, IO, org.jruby.util.IOInputStream, java.io.InputStream]
28
- # io input to be converted to an input stream
30
+ # Given a File or IO return a Java InputStream, or an InputStreamReader if
31
+ # the Encoding is explicitly specified (rather than inferred from the
32
+ # <?xml charset="..."?>) declaration in the source.
33
+ #
34
+ # @param io [File, IO, org.jruby.util.IOInputStream, java.io.InputStream]
35
+ # input to be converted to an input stream
36
+ # @param encoding [Encoding, String] the character encoding to be used to
37
+ # for the stream, overriding the XML parser.
29
38
  # @return [java.io.InputStream] the wrapped input
30
- def self.inputstream(io)
31
- case io
39
+ def self.inputstream(io, encoding = nil)
40
+ stream = case io
32
41
  when org.jruby.util.IOInputStream, java.io.InputStream
33
42
  io
34
43
  else
35
44
  io.to_inputstream if io.respond_to?(:read)
36
45
  end
46
+
47
+ return stream if encoding.nil?
48
+ java.io.InputStreamReader.new(stream, ruby_encoding_to_charset(encoding))
37
49
  end
38
50
 
39
51
  # Given a path return a Java File object
40
- # @param [String, Pathname] path the path to the file
52
+ #
53
+ # @param path [String, Pathname] the path to the file
41
54
  # @return [java.io.File] the Java File object
42
55
  def self.file(path)
43
56
  java.io.File.new(path.to_s)
44
57
  end
58
+
59
+ # Given a file path and encoding, return a Java InputStreamReader object
60
+ # for the file.
61
+ #
62
+ # @param path [String, Pathname] the path to the file
63
+ # @param encoding [String, Encoding] the file's character encoding
64
+ # @return [java.io.InputStreamReader] a Java InputStreamReader object
65
+ # wrapping a FileInputStream for the file
66
+ def self.file_reader(path, encoding)
67
+ java.io.InputStreamReader.new(java.io.FileInputStream.new(file(path)), ruby_encoding_to_charset(encoding))
68
+ end
69
+
70
+ # Return a File or Reader object for a file, depending on whether the
71
+ # encoding must be explicitly specified or not.
72
+ #
73
+ # @param path [String, Pathname] the path to the file
74
+ # @param encoding [String, Encoding] the file's character encoding
75
+ # @return [java.io.Reader] a Java Reader object
76
+ def self.file_or_reader(path, encoding = nil)
77
+ encoding.nil? ? file(path) : file_reader(path, encoding)
78
+ end
79
+
80
+ # Return a Reader object for the String with an explicitly set encoding.
81
+ # If the encoding is +ASCII_8BIT+ then a binary-mode StreamReader is
82
+ # returned, rather than a character Reader
83
+ #
84
+ # @param string [String] the string
85
+ # @param encoding [String, Encoding] the string's character encoding
86
+ # @return [java.io.InputStream, java.io.Reader] a Java InputStream or Reader object
87
+ def self.string_reader(string, encoding)
88
+ inputstream = StringIO.new(string).to_inputstream
89
+ encoding = ruby_encoding(encoding)
90
+ return inputstream if encoding == ::Encoding::ASCII_8BIT
91
+ java.io.InputStreamReader.new(inputstream, ruby_encoding_to_charset(encoding))
92
+ end
93
+
94
+ # Figure out the equivalent Java +Charset+ for a Ruby {Encoding}.
95
+ #
96
+ # @param encoding [String, Encoding] the encoding to find a +Charset+ for
97
+ def self.ruby_encoding_to_charset(encoding)
98
+ ruby_encoding(encoding).to_java.getEncoding.getCharset
99
+ end
100
+
101
+ # Given a String with an {Encoding} name or an {Encoding} instance, return
102
+ # an {Encoding} instance
103
+ #
104
+ # @param encoding [String, Encoding] the encoding or encoding name
105
+ # @return [Encoding] the encoding
106
+ def self.ruby_encoding(encoding)
107
+ encoding.nil? ? nil : ::Encoding.find(encoding)
108
+ end
45
109
  end
46
110
 
111
+ # Lambda that checks if the given path exists and is a file
47
112
  PathChecker = ->(path) {
48
113
  File.file?(path)
49
114
  }
115
+ # Lambda that checks if the given string is a valid URI
50
116
  URIChecker = ->(uri) {
51
117
  begin
52
118
  URI.parse(uri)
@@ -56,75 +122,144 @@ module Saxon
56
122
  end
57
123
  }
58
124
 
59
- # Generate a Saxon::Source given an IO-like
60
- #
61
- # @param [IO, File] io The IO-like containing XML to be parsed
62
- # @param [Hash] opts
63
- # @option opts [String] :base_uri The Base URI for the Source - an
64
- # absolute URI or relative path that will be used to resolve relative
65
- # URLs in the XML. Setting this will override any path or URI derived
66
- # from the IO-like.
67
- # @return [Saxon::Source] the Saxon::Source wrapping the input
68
- def self.from_io(io, opts = {})
69
- base_uri = opts.fetch(:base_uri) { Helpers.base_uri(io) }
70
- inputstream = Helpers.inputstream(io)
71
- stream_source = Saxon::JAXP::StreamSource.new(inputstream, base_uri)
72
- new(stream_source, inputstream)
73
- end
125
+ class << self
126
+ # Generate a Saxon::Source given an IO-like
127
+ #
128
+ # @param [IO, File] io The IO-like containing XML to be parsed
129
+ # @param [Hash] opts
130
+ # @option opts [String] :base_uri The Base URI for the Source - an
131
+ # absolute URI or relative path that will be used to resolve relative
132
+ # URLs in the XML. Setting this will override any path or URI derived
133
+ # from the IO-like.
134
+ # @option opts [String, Encoding] :encoding The encoding of the source.
135
+ # Note that specifying this will force the parser to ignore the charset
136
+ # if it's set in the XML declaration of the source. Only really useful
137
+ # if there's a discrepancy between the source's declared and actual
138
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
139
+ # source.
140
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
141
+ def from_io(io, opts = {})
142
+ base_uri = opts.fetch(:base_uri) { Helpers.base_uri(io) }
143
+ encoding = opts.fetch(:encoding, nil)
144
+ inputstream = Helpers.inputstream(io, encoding)
145
+ from_inputstream_or_reader(inputstream, base_uri)
146
+ end
74
147
 
75
- # Generate a Saxon::Source given a path to a file
76
- #
77
- # @param [String, Pathname] path The path to the XML file to be parsed
78
- # @param [Hash] opts
79
- # @option opts [String] :base_uri The Base URI for the Source - an
80
- # absolute URI or relative path that will be used to resolve relative
81
- # URLs in the XML. Setting this will override the file path.
82
- # @return [Saxon::Source] the Saxon::Source wrapping the input
83
- def self.from_path(path, opts = {})
84
- stream_source = Saxon::JAXP::StreamSource.new(Helpers.file(path))
85
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
86
- new(stream_source)
87
- end
148
+ # Generate a Saxon::Source given a path to a file
149
+ #
150
+ # @param [String, Pathname] path The path to the XML file to be parsed
151
+ # @param [Hash] opts
152
+ # @option opts [String] :base_uri The Base URI for the Source - an
153
+ # absolute URI or relative path that will be used to resolve relative
154
+ # URLs in the XML. Setting this will override the file path.
155
+ # @option opts [String, Encoding] :encoding The encoding of the source.
156
+ # Note that specifying this will force the parser to ignore the charset
157
+ # if it's set in the XML declaration of the source. Only really useful
158
+ # if there's a discrepancy between the source's declared and actual
159
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
160
+ # source.
161
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
162
+ def from_path(path, opts = {})
163
+ encoding = opts.fetch(:encoding, nil)
164
+ return from_inputstream_or_reader(Helpers.file(path), opts[:base_uri]) if encoding.nil?
165
+ reader = Helpers.file_reader(path, encoding)
166
+ base_uri = opts.fetch(:base_uri) { File.expand_path(path) }
167
+ from_inputstream_or_reader(reader, base_uri)
168
+ end
88
169
 
89
- # Generate a Saxon::Source given a URI
90
- #
91
- # @param [String, URI] uri The URI to the XML file to be parsed
92
- # @param [Hash] opts
93
- # @option opts [String] :base_uri The Base URI for the Source - an
94
- # absolute URI or relative path that will be used to resolve relative
95
- # URLs in the XML. Setting this will override the given URI.
96
- # @return [Saxon::Source] the Saxon::Source wrapping the input
97
- def self.from_uri(uri, opts = {})
98
- stream_source = Saxon::JAXP::StreamSource.new(uri.to_s)
99
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
100
- new(stream_source)
101
- end
170
+ # Generate a Saxon::Source given a URI
171
+ #
172
+ # @param [String, URI] uri The URI to the XML file to be parsed
173
+ # @param [Hash] opts
174
+ # @option opts [String] :base_uri The Base URI for the Source - an
175
+ # absolute URI or relative path that will be used to resolve relative
176
+ # URLs in the XML. Setting this will override the given URI.
177
+ # @option opts [String, Encoding] :encoding The encoding of the source.
178
+ # Note that specifying this will force the parser to ignore the charset
179
+ # if it's set in the XML declaration of the source. Only really useful
180
+ # if there's a discrepancy between the source's declared and actual
181
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
182
+ # source.
183
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
184
+ def from_uri(uri, opts = {})
185
+ encoding = opts.fetch(:encoding, nil)
186
+ return from_io(open(uri), encoding: encoding) if encoding
187
+ from_inputstream_or_reader(uri.to_s, opts[:base_uri])
188
+ end
102
189
 
103
- # Generate a Saxon::Source given a string containing XML
104
- #
105
- # @param [String] string The string containing XML to be parsed
106
- # @param [Hash] opts
107
- # @option opts [String] :base_uri The Base URI for the Source - an
108
- # absolute URI or relative path that will be used to resolve relative
109
- # URLs in the XML. This will be nil unless set.
110
- # @return [Saxon::Source] the Saxon::Source wrapping the input
111
- def self.from_string(string, opts = {})
112
- reader = java.io.StringReader.new(string)
113
- stream_source = Saxon::JAXP::StreamSource.new(reader)
114
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
115
- new(stream_source, reader)
116
- end
190
+ # Generate a Saxon::Source given a string containing XML
191
+ #
192
+ # @param [String] string The string containing XML to be parsed
193
+ # @param [Hash] opts
194
+ # @option opts [String] :base_uri The Base URI for the Source - an
195
+ # absolute URI or relative path that will be used to resolve relative
196
+ # URLs in the XML. This will be nil unless set.
197
+ # @option opts [String, Encoding] :encoding The encoding of the source.
198
+ # Note that specifying this will force the parser to ignore the charset
199
+ # if it's set in the XML declaration of the source. Only really useful
200
+ # if there's a discrepancy between the encoding of the string and the
201
+ # encoding of the source. Defaults to the encoding of the string, unless
202
+ # that is ASCII-8BIT, in which case the parser will use the
203
+ # <?xml charset="..."?> declaration in the source to pick the encoding.
204
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
205
+ def from_string(string, opts = {})
206
+ encoding = opts.fetch(:encoding) { string.encoding }
207
+ reader = Helpers.string_reader(string, encoding)
208
+ from_inputstream_or_reader(reader, opts[:base_uri])
209
+ end
210
+
211
+ # Generate a Saxon::Source from one of the several inputs allowed.
212
+ #
213
+ # If possible the character encoding of the input source will be left to
214
+ # the XML parser to discover (from the <tt><?xml charset="..."?></tt> XML
215
+ # declaration).
216
+ #
217
+ # The Base URI for the source (its absolute path, or URI) can be set by
218
+ # passing in the +:base_uri+ option. This is the same thing as an XML
219
+ # document's 'System ID' - Base URI is the term most widely used in Ruby
220
+ # libraries for this, so that's what's used here.
221
+ #
222
+ # If the source's character encoding can't be correctly discovered by the
223
+ # parser from the XML declaration (<tt><?xml version="..."
224
+ # charset="..."?></tt> at the top of the document), then it can be passed
225
+ # as the +:encoding+ option.
226
+ #
227
+ # If an existing {Source} is passed in, simply return it.
228
+ #
229
+ # @param [Saxon::Source, IO, File, String, Pathname, URI] input The XML to be parsed
230
+ # @param [Hash] opts
231
+ # @option opts [String] :base_uri The Base URI for the Source - an
232
+ # absolute URI or relative path that will be used to resolve relative
233
+ # URLs in the XML. Setting this will override any path or URI derived
234
+ # from an IO, URI, or Path.
235
+ # @option opts [String, Encoding] :encoding The encoding of the source.
236
+ # Note that specifying this will force the parser to ignore the charset
237
+ # if it's set in the XML declaration of the source. Only really useful
238
+ # if there's a discrepancy between the source's declared and actual
239
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
240
+ # source.
241
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
242
+ def create(input, opts = {})
243
+ case input
244
+ when Saxon::Source
245
+ input
246
+ when IO, File, java.io.InputStream, StringIO
247
+ from_io(input, opts)
248
+ when Pathname, PathChecker
249
+ from_path(input, opts)
250
+ when URIChecker
251
+ from_uri(input, opts)
252
+ else
253
+ from_string(input, opts)
254
+ end
255
+ end
256
+
257
+ private
117
258
 
118
- def self.create(io_path_uri_or_string, opts = {})
119
- case io_path_uri_or_string
120
- when IO, File, java.io.InputStream, StringIO
121
- from_io(io_path_uri_or_string, opts)
122
- when Pathname, PathChecker
123
- from_path(io_path_uri_or_string, opts)
124
- when URIChecker
125
- from_uri(io_path_uri_or_string, opts)
126
- else
127
- from_string(io_path_uri_or_string, opts)
259
+ def from_inputstream_or_reader(inputstream_or_reader, base_uri = nil)
260
+ stream_source = Saxon::JAXP::StreamSource.new(inputstream_or_reader)
261
+ stream_source.setSystemId(base_uri) if base_uri
262
+ new(stream_source, inputstream_or_reader)
128
263
  end
129
264
  end
130
265
 
@@ -183,5 +318,6 @@ module Saxon
183
318
  end
184
319
  end
185
320
 
321
+ # Error raised when trying to consume an already-consumed, and closed, Source
186
322
  class SourceClosedError < Exception; end
187
323
  end
data/lib/saxon/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Saxon
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/saxon/xdm.rb CHANGED
@@ -9,24 +9,31 @@ require_relative 'xdm/empty_sequence'
9
9
  require_relative 'xdm/item'
10
10
 
11
11
  module Saxon
12
+ # Classes for representing, creating, and working with the XPath Data Model
13
+ # type system used in XPath 2+, XSLT 2+, and XQuery.
12
14
  module XDM
13
15
  class << self
16
+ # Convenience function for creating a new {AtomicValue}. See {AtomicValue.create}
14
17
  def AtomicValue(*args)
15
18
  XDM::AtomicValue.create(*args)
16
19
  end
17
20
 
21
+ # Convenience function for creating a new {Value}. See {Value.create}
18
22
  def Value(*args)
19
23
  XDM::Value.create(*args)
20
24
  end
21
25
 
26
+ # Returns the XDM {EmptySequence}. See {EmptySequence.create}
22
27
  def EmptySequence()
23
28
  XDM::EmptySequence.create
24
29
  end
25
30
 
31
+ # Convenience function for creating a new {Array}. See {Array.create}
26
32
  def Array(*args)
27
33
  XDM::Array.create(*args)
28
34
  end
29
35
 
36
+ # Convenience function for creating a new {Map}. See {Map.create}
30
37
  def Map(*args)
31
38
  XDM::Map.create(*args)
32
39
  end
@@ -5,6 +5,11 @@ module Saxon
5
5
  module XDM
6
6
  # Represents an XDM Array
7
7
  class Array
8
+ # Create a new {XDM::Array} from a Ruby Array. The contents of the array
9
+ # will be converted to {XDM::Value}s using {XDM.Value()}. An existing
10
+ # {S9API::XdmArray} will simply be wrapped and returned.
11
+ #
12
+ # @return [XDM::Array] the new XDM Array
8
13
  def self.create(array)
9
14
  case array
10
15
  when S9API::XdmArray
@@ -28,14 +33,19 @@ module Saxon
28
33
  @s9_xdm_array = s9_xdm_array
29
34
  end
30
35
 
36
+ # Iterate over the Array, yielding each element.
37
+ # @yieldparam value [XDM::Value] the current value from the Array
31
38
  def each(&block)
32
39
  cached_array.each(&block)
33
40
  end
34
41
 
42
+ # Fetch element at index +i+ in the array.
43
+ # @param i [Integer] the index of the element to retrieve.
35
44
  def [](i)
36
45
  cached_array[i]
37
46
  end
38
47
 
48
+ # @return [Integer] the length of the array
39
49
  def length
40
50
  s9_xdm_array.arrayLength
41
51
  end
@@ -53,16 +63,22 @@ module Saxon
53
63
  cached_array == other.to_a
54
64
  end
55
65
 
66
+ # Return a (frozen) Ruby {::Array} containing all the elements of the {XDM::Array}
56
67
  def to_a
57
68
  cached_array
58
69
  end
59
70
 
60
71
  alias_method :eql?, :==
61
72
 
73
+ # Compute a hash-code for this {Array}.
74
+ #
75
+ # Two {XDM::Array}s with the same content will have the same hash code (and will compare using eql?).
76
+ # @see Object#hash
62
77
  def hash
63
78
  @hash ||= cached_array.hash
64
79
  end
65
80
 
81
+ # @return the underlying Java XdmArray
66
82
  def to_java
67
83
  s9_xdm_array
68
84
  end
@@ -34,14 +34,16 @@ module Saxon
34
34
  end
35
35
  end
36
36
 
37
+ # ItemType representing QNames
37
38
  XS_QNAME = ItemType.get_type('xs:QName')
39
+ # ItemType representing NOTATION
38
40
  XS_NOTATION = ItemType.get_type('xs:NOTATION')
39
41
 
40
42
  class << self
41
43
  # Convert a single Ruby value into an XDM::AtomicValue
42
44
  #
43
45
  # If no explicit {ItemType} is passed, the correct type is guessed based
44
- # on the class of the value. (e.g. <tt>xs:date</tt> for {Date}.)
46
+ # on the class of the value. (e.g. <tt>xs:date</tt> for {::Date}.)
45
47
  #
46
48
  # Values are converted based on Ruby idioms and operations, so an explicit
47
49
  # {ItemType} of <tt>xs:boolean</tt> will use truthyness to evaluate the
@@ -165,6 +167,10 @@ module Saxon
165
167
 
166
168
  alias_method :eql?, :==
167
169
 
170
+ # Compute a hash-code for this {AtomicValue}.
171
+ #
172
+ # Two {AtomicValue}s with the same content will have the same hash code (and will compare using eql?).
173
+ # @see Object#hash
168
174
  def hash
169
175
  @hash ||= s9_xdm_atomic_value.hashCode
170
176
  end