saxon-rb 0.4.0-java → 0.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +429 -42
  3. data/Gemfile +2 -2
  4. data/README.md +317 -10
  5. data/Rakefile +237 -7
  6. data/lib/net/sf/saxon/Saxon-HE/{9.9.1-5/Saxon-HE-9.9.1-5.jar → 9.9.1-6/Saxon-HE-9.9.1-6.jar} +0 -0
  7. data/lib/saxon-rb.rb +1 -0
  8. data/lib/{saxon_jars.rb → saxon-rb_jars.rb} +2 -2
  9. data/lib/saxon.rb +13 -0
  10. data/lib/saxon/axis_iterator.rb +8 -1
  11. data/lib/saxon/configuration.rb +1 -0
  12. data/lib/saxon/item_type.rb +12 -17
  13. data/lib/saxon/item_type/lexical_string_conversion.rb +136 -58
  14. data/lib/saxon/item_type/value_to_ruby.rb +13 -0
  15. data/lib/saxon/loader.rb +4 -1
  16. data/lib/saxon/nokogiri.rb +78 -0
  17. data/lib/saxon/occurrence_indicator.rb +32 -3
  18. data/lib/saxon/processor.rb +32 -1
  19. data/lib/saxon/qname.rb +37 -2
  20. data/lib/saxon/s9api.rb +5 -0
  21. data/lib/saxon/sequence_type.rb +131 -0
  22. data/lib/saxon/source.rb +207 -71
  23. data/lib/saxon/version.rb +1 -1
  24. data/lib/saxon/xdm.rb +7 -0
  25. data/lib/saxon/xdm/array.rb +16 -0
  26. data/lib/saxon/xdm/atomic_value.rb +7 -1
  27. data/lib/saxon/xdm/empty_sequence.rb +13 -0
  28. data/lib/saxon/xdm/external_object.rb +1 -0
  29. data/lib/saxon/xdm/function_item.rb +1 -0
  30. data/lib/saxon/xdm/item.rb +7 -0
  31. data/lib/saxon/xdm/map.rb +38 -0
  32. data/lib/saxon/xdm/node.rb +19 -1
  33. data/lib/saxon/xdm/sequence_like.rb +15 -0
  34. data/lib/saxon/xdm/value.rb +21 -5
  35. data/lib/saxon/xpath.rb +9 -0
  36. data/lib/saxon/xpath/compiler.rb +36 -1
  37. data/lib/saxon/xpath/executable.rb +53 -28
  38. data/lib/saxon/xpath/static_context.rb +19 -39
  39. data/lib/saxon/xpath/variable_declaration.rb +16 -49
  40. data/lib/saxon/xslt.rb +12 -0
  41. data/lib/saxon/xslt/compiler.rb +75 -6
  42. data/lib/saxon/xslt/evaluation_context.rb +19 -3
  43. data/lib/saxon/xslt/executable.rb +204 -14
  44. data/saxon-rb.gemspec +1 -1
  45. metadata +9 -7
  46. data/saxon.gemspec +0 -30
data/lib/saxon/source.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'java'
2
2
  require 'saxon/jaxp'
3
3
  require 'uri'
4
+ require 'open-uri'
4
5
  require 'pathname'
5
6
 
6
7
  module Saxon
@@ -8,10 +9,13 @@ module Saxon
8
9
  # the XML bytestream in. Provides some extra methods to make handling closing
9
10
  # the source and its inputstream after consumption more idiomatic
10
11
  class Source
12
+ # Helper methods for getting Java-useful representations of source document
13
+ # strings and files
11
14
  module Helpers
12
15
  # Given a File, or IO object which will return either #path or
13
16
  # #base_uri, return the #base_uri, if present, or the #path, if present, or
14
17
  # nil
18
+ #
15
19
  # @param [File, IO] io A File or IO
16
20
  # object representing the input XML file or data, or a String containing
17
21
  # the XML
@@ -23,30 +27,92 @@ module Saxon
23
27
  io.path if io.respond_to?(:path)
24
28
  end
25
29
 
26
- # Given a File or IO return a Java InputStream
27
- # @param [File, IO, org.jruby.util.IOInputStream, java.io.InputStream]
28
- # io input to be converted to an input stream
30
+ # Given a File or IO return a Java InputStream, or an InputStreamReader if
31
+ # the Encoding is explicitly specified (rather than inferred from the
32
+ # <?xml charset="..."?>) declaration in the source.
33
+ #
34
+ # @param io [File, IO, org.jruby.util.IOInputStream, java.io.InputStream]
35
+ # input to be converted to an input stream
36
+ # @param encoding [Encoding, String] the character encoding to be used to
37
+ # for the stream, overriding the XML parser.
29
38
  # @return [java.io.InputStream] the wrapped input
30
- def self.inputstream(io)
31
- case io
39
+ def self.inputstream(io, encoding = nil)
40
+ stream = case io
32
41
  when org.jruby.util.IOInputStream, java.io.InputStream
33
42
  io
34
43
  else
35
44
  io.to_inputstream if io.respond_to?(:read)
36
45
  end
46
+
47
+ return stream if encoding.nil?
48
+ java.io.InputStreamReader.new(stream, ruby_encoding_to_charset(encoding))
37
49
  end
38
50
 
39
51
  # Given a path return a Java File object
40
- # @param [String, Pathname] path the path to the file
52
+ #
53
+ # @param path [String, Pathname] the path to the file
41
54
  # @return [java.io.File] the Java File object
42
55
  def self.file(path)
43
56
  java.io.File.new(path.to_s)
44
57
  end
58
+
59
+ # Given a file path and encoding, return a Java InputStreamReader object
60
+ # for the file.
61
+ #
62
+ # @param path [String, Pathname] the path to the file
63
+ # @param encoding [String, Encoding] the file's character encoding
64
+ # @return [java.io.InputStreamReader] a Java InputStreamReader object
65
+ # wrapping a FileInputStream for the file
66
+ def self.file_reader(path, encoding)
67
+ java.io.InputStreamReader.new(java.io.FileInputStream.new(file(path)), ruby_encoding_to_charset(encoding))
68
+ end
69
+
70
+ # Return a File or Reader object for a file, depending on whether the
71
+ # encoding must be explicitly specified or not.
72
+ #
73
+ # @param path [String, Pathname] the path to the file
74
+ # @param encoding [String, Encoding] the file's character encoding
75
+ # @return [java.io.Reader] a Java Reader object
76
+ def self.file_or_reader(path, encoding = nil)
77
+ encoding.nil? ? file(path) : file_reader(path, encoding)
78
+ end
79
+
80
+ # Return a Reader object for the String with an explicitly set encoding.
81
+ # If the encoding is +ASCII_8BIT+ then a binary-mode StreamReader is
82
+ # returned, rather than a character Reader
83
+ #
84
+ # @param string [String] the string
85
+ # @param encoding [String, Encoding] the string's character encoding
86
+ # @return [java.io.InputStream, java.io.Reader] a Java InputStream or Reader object
87
+ def self.string_reader(string, encoding)
88
+ inputstream = StringIO.new(string).to_inputstream
89
+ encoding = ruby_encoding(encoding)
90
+ return inputstream if encoding == ::Encoding::ASCII_8BIT
91
+ java.io.InputStreamReader.new(inputstream, ruby_encoding_to_charset(encoding))
92
+ end
93
+
94
+ # Figure out the equivalent Java +Charset+ for a Ruby {Encoding}.
95
+ #
96
+ # @param encoding [String, Encoding] the encoding to find a +Charset+ for
97
+ def self.ruby_encoding_to_charset(encoding)
98
+ ruby_encoding(encoding).to_java.getEncoding.getCharset
99
+ end
100
+
101
+ # Given a String with an {Encoding} name or an {Encoding} instance, return
102
+ # an {Encoding} instance
103
+ #
104
+ # @param encoding [String, Encoding] the encoding or encoding name
105
+ # @return [Encoding] the encoding
106
+ def self.ruby_encoding(encoding)
107
+ encoding.nil? ? nil : ::Encoding.find(encoding)
108
+ end
45
109
  end
46
110
 
111
+ # Lambda that checks if the given path exists and is a file
47
112
  PathChecker = ->(path) {
48
113
  File.file?(path)
49
114
  }
115
+ # Lambda that checks if the given string is a valid URI
50
116
  URIChecker = ->(uri) {
51
117
  begin
52
118
  URI.parse(uri)
@@ -56,75 +122,144 @@ module Saxon
56
122
  end
57
123
  }
58
124
 
59
- # Generate a Saxon::Source given an IO-like
60
- #
61
- # @param [IO, File] io The IO-like containing XML to be parsed
62
- # @param [Hash] opts
63
- # @option opts [String] :base_uri The Base URI for the Source - an
64
- # absolute URI or relative path that will be used to resolve relative
65
- # URLs in the XML. Setting this will override any path or URI derived
66
- # from the IO-like.
67
- # @return [Saxon::Source] the Saxon::Source wrapping the input
68
- def self.from_io(io, opts = {})
69
- base_uri = opts.fetch(:base_uri) { Helpers.base_uri(io) }
70
- inputstream = Helpers.inputstream(io)
71
- stream_source = Saxon::JAXP::StreamSource.new(inputstream, base_uri)
72
- new(stream_source, inputstream)
73
- end
125
+ class << self
126
+ # Generate a Saxon::Source given an IO-like
127
+ #
128
+ # @param [IO, File] io The IO-like containing XML to be parsed
129
+ # @param [Hash] opts
130
+ # @option opts [String] :base_uri The Base URI for the Source - an
131
+ # absolute URI or relative path that will be used to resolve relative
132
+ # URLs in the XML. Setting this will override any path or URI derived
133
+ # from the IO-like.
134
+ # @option opts [String, Encoding] :encoding The encoding of the source.
135
+ # Note that specifying this will force the parser to ignore the charset
136
+ # if it's set in the XML declaration of the source. Only really useful
137
+ # if there's a discrepancy between the source's declared and actual
138
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
139
+ # source.
140
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
141
+ def from_io(io, opts = {})
142
+ base_uri = opts.fetch(:base_uri) { Helpers.base_uri(io) }
143
+ encoding = opts.fetch(:encoding, nil)
144
+ inputstream = Helpers.inputstream(io, encoding)
145
+ from_inputstream_or_reader(inputstream, base_uri)
146
+ end
74
147
 
75
- # Generate a Saxon::Source given a path to a file
76
- #
77
- # @param [String, Pathname] path The path to the XML file to be parsed
78
- # @param [Hash] opts
79
- # @option opts [String] :base_uri The Base URI for the Source - an
80
- # absolute URI or relative path that will be used to resolve relative
81
- # URLs in the XML. Setting this will override the file path.
82
- # @return [Saxon::Source] the Saxon::Source wrapping the input
83
- def self.from_path(path, opts = {})
84
- stream_source = Saxon::JAXP::StreamSource.new(Helpers.file(path))
85
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
86
- new(stream_source)
87
- end
148
+ # Generate a Saxon::Source given a path to a file
149
+ #
150
+ # @param [String, Pathname] path The path to the XML file to be parsed
151
+ # @param [Hash] opts
152
+ # @option opts [String] :base_uri The Base URI for the Source - an
153
+ # absolute URI or relative path that will be used to resolve relative
154
+ # URLs in the XML. Setting this will override the file path.
155
+ # @option opts [String, Encoding] :encoding The encoding of the source.
156
+ # Note that specifying this will force the parser to ignore the charset
157
+ # if it's set in the XML declaration of the source. Only really useful
158
+ # if there's a discrepancy between the source's declared and actual
159
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
160
+ # source.
161
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
162
+ def from_path(path, opts = {})
163
+ encoding = opts.fetch(:encoding, nil)
164
+ return from_inputstream_or_reader(Helpers.file(path), opts[:base_uri]) if encoding.nil?
165
+ reader = Helpers.file_reader(path, encoding)
166
+ base_uri = opts.fetch(:base_uri) { File.expand_path(path) }
167
+ from_inputstream_or_reader(reader, base_uri)
168
+ end
88
169
 
89
- # Generate a Saxon::Source given a URI
90
- #
91
- # @param [String, URI] uri The URI to the XML file to be parsed
92
- # @param [Hash] opts
93
- # @option opts [String] :base_uri The Base URI for the Source - an
94
- # absolute URI or relative path that will be used to resolve relative
95
- # URLs in the XML. Setting this will override the given URI.
96
- # @return [Saxon::Source] the Saxon::Source wrapping the input
97
- def self.from_uri(uri, opts = {})
98
- stream_source = Saxon::JAXP::StreamSource.new(uri.to_s)
99
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
100
- new(stream_source)
101
- end
170
+ # Generate a Saxon::Source given a URI
171
+ #
172
+ # @param [String, URI] uri The URI to the XML file to be parsed
173
+ # @param [Hash] opts
174
+ # @option opts [String] :base_uri The Base URI for the Source - an
175
+ # absolute URI or relative path that will be used to resolve relative
176
+ # URLs in the XML. Setting this will override the given URI.
177
+ # @option opts [String, Encoding] :encoding The encoding of the source.
178
+ # Note that specifying this will force the parser to ignore the charset
179
+ # if it's set in the XML declaration of the source. Only really useful
180
+ # if there's a discrepancy between the source's declared and actual
181
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
182
+ # source.
183
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
184
+ def from_uri(uri, opts = {})
185
+ encoding = opts.fetch(:encoding, nil)
186
+ return from_io(open(uri), encoding: encoding) if encoding
187
+ from_inputstream_or_reader(uri.to_s, opts[:base_uri])
188
+ end
102
189
 
103
- # Generate a Saxon::Source given a string containing XML
104
- #
105
- # @param [String] string The string containing XML to be parsed
106
- # @param [Hash] opts
107
- # @option opts [String] :base_uri The Base URI for the Source - an
108
- # absolute URI or relative path that will be used to resolve relative
109
- # URLs in the XML. This will be nil unless set.
110
- # @return [Saxon::Source] the Saxon::Source wrapping the input
111
- def self.from_string(string, opts = {})
112
- reader = java.io.StringReader.new(string)
113
- stream_source = Saxon::JAXP::StreamSource.new(reader)
114
- stream_source.setSystemId(opts[:base_uri]) if opts[:base_uri]
115
- new(stream_source, reader)
116
- end
190
+ # Generate a Saxon::Source given a string containing XML
191
+ #
192
+ # @param [String] string The string containing XML to be parsed
193
+ # @param [Hash] opts
194
+ # @option opts [String] :base_uri The Base URI for the Source - an
195
+ # absolute URI or relative path that will be used to resolve relative
196
+ # URLs in the XML. This will be nil unless set.
197
+ # @option opts [String, Encoding] :encoding The encoding of the source.
198
+ # Note that specifying this will force the parser to ignore the charset
199
+ # if it's set in the XML declaration of the source. Only really useful
200
+ # if there's a discrepancy between the encoding of the string and the
201
+ # encoding of the source. Defaults to the encoding of the string, unless
202
+ # that is ASCII-8BIT, in which case the parser will use the
203
+ # <?xml charset="..."?> declaration in the source to pick the encoding.
204
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
205
+ def from_string(string, opts = {})
206
+ encoding = opts.fetch(:encoding) { string.encoding }
207
+ reader = Helpers.string_reader(string, encoding)
208
+ from_inputstream_or_reader(reader, opts[:base_uri])
209
+ end
210
+
211
+ # Generate a Saxon::Source from one of the several inputs allowed.
212
+ #
213
+ # If possible the character encoding of the input source will be left to
214
+ # the XML parser to discover (from the <tt><?xml charset="..."?></tt> XML
215
+ # declaration).
216
+ #
217
+ # The Base URI for the source (its absolute path, or URI) can be set by
218
+ # passing in the +:base_uri+ option. This is the same thing as an XML
219
+ # document's 'System ID' - Base URI is the term most widely used in Ruby
220
+ # libraries for this, so that's what's used here.
221
+ #
222
+ # If the source's character encoding can't be correctly discovered by the
223
+ # parser from the XML declaration (<tt><?xml version="..."
224
+ # charset="..."?></tt> at the top of the document), then it can be passed
225
+ # as the +:encoding+ option.
226
+ #
227
+ # If an existing {Source} is passed in, simply return it.
228
+ #
229
+ # @param [Saxon::Source, IO, File, String, Pathname, URI] input The XML to be parsed
230
+ # @param [Hash] opts
231
+ # @option opts [String] :base_uri The Base URI for the Source - an
232
+ # absolute URI or relative path that will be used to resolve relative
233
+ # URLs in the XML. Setting this will override any path or URI derived
234
+ # from an IO, URI, or Path.
235
+ # @option opts [String, Encoding] :encoding The encoding of the source.
236
+ # Note that specifying this will force the parser to ignore the charset
237
+ # if it's set in the XML declaration of the source. Only really useful
238
+ # if there's a discrepancy between the source's declared and actual
239
+ # encoding. Defaults to the <?xml charset="..."?> declaration in the
240
+ # source.
241
+ # @return [Saxon::Source] the Saxon::Source wrapping the input
242
+ def create(input, opts = {})
243
+ case input
244
+ when Saxon::Source
245
+ input
246
+ when IO, File, java.io.InputStream, StringIO
247
+ from_io(input, opts)
248
+ when Pathname, PathChecker
249
+ from_path(input, opts)
250
+ when URIChecker
251
+ from_uri(input, opts)
252
+ else
253
+ from_string(input, opts)
254
+ end
255
+ end
256
+
257
+ private
117
258
 
118
- def self.create(io_path_uri_or_string, opts = {})
119
- case io_path_uri_or_string
120
- when IO, File, java.io.InputStream, StringIO
121
- from_io(io_path_uri_or_string, opts)
122
- when Pathname, PathChecker
123
- from_path(io_path_uri_or_string, opts)
124
- when URIChecker
125
- from_uri(io_path_uri_or_string, opts)
126
- else
127
- from_string(io_path_uri_or_string, opts)
259
+ def from_inputstream_or_reader(inputstream_or_reader, base_uri = nil)
260
+ stream_source = Saxon::JAXP::StreamSource.new(inputstream_or_reader)
261
+ stream_source.setSystemId(base_uri) if base_uri
262
+ new(stream_source, inputstream_or_reader)
128
263
  end
129
264
  end
130
265
 
@@ -183,5 +318,6 @@ module Saxon
183
318
  end
184
319
  end
185
320
 
321
+ # Error raised when trying to consume an already-consumed, and closed, Source
186
322
  class SourceClosedError < Exception; end
187
323
  end
data/lib/saxon/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Saxon
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/saxon/xdm.rb CHANGED
@@ -9,24 +9,31 @@ require_relative 'xdm/empty_sequence'
9
9
  require_relative 'xdm/item'
10
10
 
11
11
  module Saxon
12
+ # Classes for representing, creating, and working with the XPath Data Model
13
+ # type system used in XPath 2+, XSLT 2+, and XQuery.
12
14
  module XDM
13
15
  class << self
16
+ # Convenience function for creating a new {AtomicValue}. See {AtomicValue.create}
14
17
  def AtomicValue(*args)
15
18
  XDM::AtomicValue.create(*args)
16
19
  end
17
20
 
21
+ # Convenience function for creating a new {Value}. See {Value.create}
18
22
  def Value(*args)
19
23
  XDM::Value.create(*args)
20
24
  end
21
25
 
26
+ # Returns the XDM {EmptySequence}. See {EmptySequence.create}
22
27
  def EmptySequence()
23
28
  XDM::EmptySequence.create
24
29
  end
25
30
 
31
+ # Convenience function for creating a new {Array}. See {Array.create}
26
32
  def Array(*args)
27
33
  XDM::Array.create(*args)
28
34
  end
29
35
 
36
+ # Convenience function for creating a new {Map}. See {Map.create}
30
37
  def Map(*args)
31
38
  XDM::Map.create(*args)
32
39
  end
@@ -5,6 +5,11 @@ module Saxon
5
5
  module XDM
6
6
  # Represents an XDM Array
7
7
  class Array
8
+ # Create a new {XDM::Array} from a Ruby Array. The contents of the array
9
+ # will be converted to {XDM::Value}s using {XDM.Value()}. An existing
10
+ # {S9API::XdmArray} will simply be wrapped and returned.
11
+ #
12
+ # @return [XDM::Array] the new XDM Array
8
13
  def self.create(array)
9
14
  case array
10
15
  when S9API::XdmArray
@@ -28,14 +33,19 @@ module Saxon
28
33
  @s9_xdm_array = s9_xdm_array
29
34
  end
30
35
 
36
+ # Iterate over the Array, yielding each element.
37
+ # @yieldparam value [XDM::Value] the current value from the Array
31
38
  def each(&block)
32
39
  cached_array.each(&block)
33
40
  end
34
41
 
42
+ # Fetch element at index +i+ in the array.
43
+ # @param i [Integer] the index of the element to retrieve.
35
44
  def [](i)
36
45
  cached_array[i]
37
46
  end
38
47
 
48
+ # @return [Integer] the length of the array
39
49
  def length
40
50
  s9_xdm_array.arrayLength
41
51
  end
@@ -53,16 +63,22 @@ module Saxon
53
63
  cached_array == other.to_a
54
64
  end
55
65
 
66
+ # Return a (frozen) Ruby {::Array} containing all the elements of the {XDM::Array}
56
67
  def to_a
57
68
  cached_array
58
69
  end
59
70
 
60
71
  alias_method :eql?, :==
61
72
 
73
+ # Compute a hash-code for this {Array}.
74
+ #
75
+ # Two {XDM::Array}s with the same content will have the same hash code (and will compare using eql?).
76
+ # @see Object#hash
62
77
  def hash
63
78
  @hash ||= cached_array.hash
64
79
  end
65
80
 
81
+ # @return the underlying Java XdmArray
66
82
  def to_java
67
83
  s9_xdm_array
68
84
  end
@@ -34,14 +34,16 @@ module Saxon
34
34
  end
35
35
  end
36
36
 
37
+ # ItemType representing QNames
37
38
  XS_QNAME = ItemType.get_type('xs:QName')
39
+ # ItemType representing NOTATION
38
40
  XS_NOTATION = ItemType.get_type('xs:NOTATION')
39
41
 
40
42
  class << self
41
43
  # Convert a single Ruby value into an XDM::AtomicValue
42
44
  #
43
45
  # If no explicit {ItemType} is passed, the correct type is guessed based
44
- # on the class of the value. (e.g. <tt>xs:date</tt> for {Date}.)
46
+ # on the class of the value. (e.g. <tt>xs:date</tt> for {::Date}.)
45
47
  #
46
48
  # Values are converted based on Ruby idioms and operations, so an explicit
47
49
  # {ItemType} of <tt>xs:boolean</tt> will use truthyness to evaluate the
@@ -165,6 +167,10 @@ module Saxon
165
167
 
166
168
  alias_method :eql?, :==
167
169
 
170
+ # Compute a hash-code for this {AtomicValue}.
171
+ #
172
+ # Two {AtomicValue}s with the same content will have the same hash code (and will compare using eql?).
173
+ # @see Object#hash
168
174
  def hash
169
175
  @hash ||= s9_xdm_atomic_value.hashCode
170
176
  end