saxon-rb 0.4.0-java → 0.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +429 -42
  3. data/Gemfile +2 -2
  4. data/README.md +317 -10
  5. data/Rakefile +237 -7
  6. data/lib/net/sf/saxon/Saxon-HE/{9.9.1-5/Saxon-HE-9.9.1-5.jar → 9.9.1-6/Saxon-HE-9.9.1-6.jar} +0 -0
  7. data/lib/saxon-rb.rb +1 -0
  8. data/lib/{saxon_jars.rb → saxon-rb_jars.rb} +2 -2
  9. data/lib/saxon.rb +13 -0
  10. data/lib/saxon/axis_iterator.rb +8 -1
  11. data/lib/saxon/configuration.rb +1 -0
  12. data/lib/saxon/item_type.rb +12 -17
  13. data/lib/saxon/item_type/lexical_string_conversion.rb +136 -58
  14. data/lib/saxon/item_type/value_to_ruby.rb +13 -0
  15. data/lib/saxon/loader.rb +4 -1
  16. data/lib/saxon/nokogiri.rb +78 -0
  17. data/lib/saxon/occurrence_indicator.rb +32 -3
  18. data/lib/saxon/processor.rb +32 -1
  19. data/lib/saxon/qname.rb +37 -2
  20. data/lib/saxon/s9api.rb +5 -0
  21. data/lib/saxon/sequence_type.rb +131 -0
  22. data/lib/saxon/source.rb +207 -71
  23. data/lib/saxon/version.rb +1 -1
  24. data/lib/saxon/xdm.rb +7 -0
  25. data/lib/saxon/xdm/array.rb +16 -0
  26. data/lib/saxon/xdm/atomic_value.rb +7 -1
  27. data/lib/saxon/xdm/empty_sequence.rb +13 -0
  28. data/lib/saxon/xdm/external_object.rb +1 -0
  29. data/lib/saxon/xdm/function_item.rb +1 -0
  30. data/lib/saxon/xdm/item.rb +7 -0
  31. data/lib/saxon/xdm/map.rb +38 -0
  32. data/lib/saxon/xdm/node.rb +19 -1
  33. data/lib/saxon/xdm/sequence_like.rb +15 -0
  34. data/lib/saxon/xdm/value.rb +21 -5
  35. data/lib/saxon/xpath.rb +9 -0
  36. data/lib/saxon/xpath/compiler.rb +36 -1
  37. data/lib/saxon/xpath/executable.rb +53 -28
  38. data/lib/saxon/xpath/static_context.rb +19 -39
  39. data/lib/saxon/xpath/variable_declaration.rb +16 -49
  40. data/lib/saxon/xslt.rb +12 -0
  41. data/lib/saxon/xslt/compiler.rb +75 -6
  42. data/lib/saxon/xslt/evaluation_context.rb +19 -3
  43. data/lib/saxon/xslt/executable.rb +204 -14
  44. data/saxon-rb.gemspec +1 -1
  45. metadata +9 -7
  46. data/saxon.gemspec +0 -30
data/lib/saxon-rb.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'saxon'
@@ -2,9 +2,9 @@
2
2
  begin
3
3
  require 'jar_dependencies'
4
4
  rescue LoadError
5
- require 'net/sf/saxon/Saxon-HE/9.9.1-5/Saxon-HE-9.9.1-5.jar'
5
+ require 'net/sf/saxon/Saxon-HE/9.9.1-6/Saxon-HE-9.9.1-6.jar'
6
6
  end
7
7
 
8
8
  if defined? Jars
9
- require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-5'
9
+ require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-6'
10
10
  end
data/lib/saxon.rb CHANGED
@@ -3,4 +3,17 @@ require_relative 'saxon/processor'
3
3
 
4
4
  # An idiomatic Ruby wrapper around the Saxon XML processing library.
5
5
  module Saxon
6
+ # Parse an XML document using a vanilla {DocumentBuilder} from the default
7
+ # {Processor}
8
+ # @see Saxon::Processor#XML
9
+ def self.XML(input, opts = {})
10
+ Processor.default.XML(input, opts)
11
+ end
12
+
13
+ # Compile an XSLT Stylesheet using a new {XSLT::Compiler} from the default
14
+ # {Processor}
15
+ # @see Saxon::Processor#XSLT
16
+ def self.XSLT(input, opts = {}, &block)
17
+ Processor.default.XSLT(input, opts, &block)
18
+ end
6
19
  end
@@ -1,7 +1,12 @@
1
1
  require_relative 's9api'
2
2
 
3
3
  module Saxon
4
- # An XPath Data Model Node object, representing an XML document, or an element or one of the other node chunks in the XDM.
4
+ # An iterator across an XPath axis of an XDM document, e.g. down to children
5
+ # (+child+), up to the root (+ancestor+)
6
+ # @example iterate over child nodes
7
+ # AxisIterator.new(node, :child).each do |child_node|
8
+ # puts child_node.node_name
9
+ # end
5
10
  class AxisIterator
6
11
  include Enumerable
7
12
 
@@ -18,6 +23,8 @@ module Saxon
18
23
  s9_sequence_iterator
19
24
  end
20
25
 
26
+ # yields each node in the sequence
27
+ # @yieldparam [Saxon::XDM::Node] the next node in the sequence
21
28
  def each(&block)
22
29
  s9_sequence_iterator.lazy.map { |s9_xdm_node| Saxon::XDM::Node.new(s9_xdm_node) }.each(&block)
23
30
  end
@@ -10,6 +10,7 @@ module Saxon
10
10
  # for details of the constant names used to access the values
11
11
  class Configuration
12
12
  DEFAULT_SEMAPHORE = Mutex.new
13
+ private_constant :DEFAULT_SEMAPHORE
13
14
 
14
15
  # Provides a processor with default configuration. Essentially a singleton
15
16
  # instance
@@ -13,6 +13,7 @@ module Saxon
13
13
  @class_name = class_name
14
14
  end
15
15
 
16
+ # error message including class name no type equivalent found for
16
17
  def to_s
17
18
  "Ruby class <#{@class_name}> has no XDM type equivalent"
18
19
  end
@@ -26,29 +27,14 @@ module Saxon
26
27
  @type_str = type_str
27
28
  end
28
29
 
30
+ # error message including type string with no matching built-in type
29
31
  def to_s
30
32
  "'#{@type_str}' is not recognised as an XSD built-in type"
31
33
  end
32
34
  end
33
35
 
34
- class Factory
35
- DEFAULT_SEMAPHORE = Mutex.new
36
-
37
- attr_reader :processor
38
-
39
- def initialize(processor)
40
- @processor = processor
41
- end
42
-
43
- def s9_factory
44
- return @s9_factory if instance_variable_defined?(:@s9_factory)
45
- DEFAULT_SEMAPHORE.synchronize do
46
- @s9_factory = S9API::ItemTypeFactory.new(processor.to_java)
47
- end
48
- end
49
- end
50
-
51
36
  TYPE_CACHE_MUTEX = Mutex.new
37
+ private_constant :TYPE_CACHE_MUTEX
52
38
  # A mapping of Ruby types to XDM type constants
53
39
  TYPE_MAPPING = {
54
40
  'String' => :STRING,
@@ -61,6 +47,8 @@ module Saxon
61
47
  'Time' => :DATE_TIME,
62
48
  'BigDecimal' => :DECIMAL,
63
49
  'Integer' => :INTEGER,
50
+ 'Fixnum' => :INTEGER, # Fixnum/Bignum needed for JRuby 9.1/Ruby 2.3
51
+ 'Bignum' => :INTEGER,
64
52
  'Float' => :FLOAT,
65
53
  'Numeric' => :NUMERIC
66
54
  }.freeze
@@ -132,12 +120,14 @@ module Saxon
132
120
  Hash[QNAME_MAPPING.map { |qname, v| [qname.to_s, v] }]
133
121
  ).freeze
134
122
 
123
+ # convertors to generate lexical strings for a given {ItemType}, as a hash keyed on the ItemType
135
124
  ATOMIC_VALUE_LEXICAL_STRING_CONVERTORS = Hash[
136
125
  LexicalStringConversion::Convertors.constants.map { |const|
137
126
  [S9API::ItemType.const_get(const), LexicalStringConversion::Convertors.const_get(const)]
138
127
  }
139
128
  ].freeze
140
129
 
130
+ # convertors from {XDM::AtomicValue} to a ruby primitve value, as a hash keyed on the ItemType
141
131
  ATOMIC_VALUE_TO_RUBY_CONVERTORS = Hash[
142
132
  ValueToRuby::Convertors.constants.map { |const|
143
133
  [S9API::ItemType.const_get(const), ValueToRuby::Convertors.const_get(const)]
@@ -155,6 +145,7 @@ module Saxon
155
145
  # @overload get_type(type_name)
156
146
  # Get the {ItemType} for the name
157
147
  # @param type_name [String] name of the built-in {ItemType} to fetch
148
+ # (e.g. +xs:string+ or +element()+)
158
149
  # @overload get_type(item_type)
159
150
  # Given an instance of {ItemType}, simply return the instance
160
151
  # @param item_type [Saxon::ItemType] an existing ItemType instance
@@ -182,6 +173,8 @@ module Saxon
182
173
 
183
174
  def get_s9_type(arg)
184
175
  case arg
176
+ when S9API::ItemType
177
+ arg
185
178
  when Saxon::QName
186
179
  get_s9_qname_mapped_type(arg)
187
180
  when Class
@@ -250,6 +243,8 @@ module Saxon
250
243
 
251
244
  alias_method :eql?, :==
252
245
 
246
+ # Return a hash code so this can be used as a key in a {::Hash}.
247
+ # @return [Fixnum] the hash code
253
248
  def hash
254
249
  @hash ||= s9_item_type.hashCode
255
250
  end
@@ -5,12 +5,26 @@ module Saxon
5
5
  # A collection of lamba-like objects for converting Ruby values into
6
6
  # lexical strings for specific XSD datatypes
7
7
  module LexicalStringConversion
8
+ # Simple validation helper that checks if a value string matches an
9
+ # allowed lexical string pattern space or not.
10
+ #
11
+ # @param value [Object] the value whose to_s representation should be
12
+ # checked
13
+ # @param item_type [Saxon::ItemType] the ItemType whose lexical pattern
14
+ # space should be checked against
15
+ # @param pattern [Regexp] the lexical pattern space Regexp to use in the
16
+ # checking
17
+ # @return [String] the lexical string for the value and type
18
+ # @raise [Errors::BadRubyValue] if the ruby value doesn't produce a string
19
+ # which validates against the allowed pattern
8
20
  def self.validate(value, item_type, pattern)
9
21
  str = value.to_s
10
- raise Errors::BadRubyValue.new(value, item_type) unless str.match?(pattern)
22
+ raise Errors::BadRubyValue.new(value, item_type) if str.match(pattern).nil?
11
23
  str
12
24
  end
13
25
 
26
+ # Helper class for performing conversion and validation to XDM integer
27
+ # types from Ruby's Fixnum/Bignum/Integer classes
14
28
  class IntegerConversion
15
29
  attr_reader :min, :max
16
30
 
@@ -18,20 +32,40 @@ module Saxon
18
32
  @min, @max = min, max
19
33
  end
20
34
 
35
+ # Returns whether the Ruby integer is within the range allowed for the
36
+ # XDM type
37
+ # @param integer_value [Integer] the ruby integer to check
38
+ # @return [Boolean] whether the value is within bounds
21
39
  def in_bounds?(integer_value)
22
40
  gte_min?(integer_value) && lte_max?(integer_value)
23
41
  end
24
42
 
43
+ # Returns whether the Ruby integer is >= the lower bound of the range
44
+ # allowed for the XDM type
45
+ # @param integer_value [Integer] the ruby integer to check
46
+ # @return [Boolean] whether the value is okay
25
47
  def gte_min?(integer_value)
26
48
  return true if min.nil?
27
49
  integer_value >= min
28
50
  end
29
51
 
52
+ # Returns whether the Ruby integer is <= the upper bound of the range
53
+ # allowed for the XDM type
54
+ # @param integer_value [Integer] the ruby integer to check
55
+ # @return [Boolean] whether the value is okay
30
56
  def lte_max?(integer_value)
31
57
  return true if max.nil?
32
58
  integer_value <= max
33
59
  end
34
60
 
61
+ # Check a value against our type constraints, and return the lexical
62
+ # string representation if it's okay.
63
+ #
64
+ # @param value [Integer] the ruby value
65
+ # @param item_type [Saxon::ItemType] the item type
66
+ # @return [String] the lexical string representation of the value
67
+ # @raise [Errors::RubyValueOutOfBounds] if the value is outside the
68
+ # type's permitted bounds
35
69
  def call(value, item_type)
36
70
  integer_value = case value
37
71
  when ::Numeric
@@ -44,15 +78,42 @@ module Saxon
44
78
  end
45
79
  end
46
80
 
81
+ # Helper class for performing conversion and validation to XDM
82
+ # Floating-point types from Ruby's Float class
47
83
  class FloatConversion
48
84
  def initialize(size = :double)
49
85
  @double = size == :double
50
86
  end
51
87
 
88
+ # Check a value against our type constraints, and return the lexical
89
+ # string representation if it's okay.
90
+ #
91
+ # @param value [Float] the ruby value
92
+ # @param item_type [Saxon::ItemType] the item type
93
+ # @return [String] the lexical string representation of the value
94
+ def call(value, item_type)
95
+ case value
96
+ when ::Float::INFINITY
97
+ 'INF'
98
+ when -::Float::INFINITY
99
+ '-INF'
100
+ when Numeric
101
+ float_value(value).to_s
102
+ else
103
+ LexicalStringConversion.validate(value, item_type, Patterns::FLOAT)
104
+ end
105
+ end
106
+
107
+ private
108
+
109
+ # Is this a double-precision XDM float?
110
+ # @return [Boolean] true if we're converting a double-precision float
52
111
  def double?
53
112
  @double
54
113
  end
55
114
 
115
+ # Return the float as either a double-precision or single-precision
116
+ # float as needed
56
117
  def float_value(float_value)
57
118
  return float_value if double?
58
119
  convert_to_single_precision(float_value)
@@ -61,30 +122,72 @@ module Saxon
61
122
  def convert_to_single_precision(float_value)
62
123
  [float_value].pack('f').unpack('f').first
63
124
  end
125
+ end
126
+
127
+ # Convert a value in seconds into an XDM Duration string
128
+ class DurationConversion
129
+ attr_reader :pattern
64
130
 
131
+ def initialize(pattern)
132
+ @pattern = pattern
133
+ end
134
+
135
+ # Produce a lexical Duration string from a numeric Ruby value
136
+ # representing seconds
65
137
  def call(value, item_type)
138
+ return numeric(value) if Numeric === value
139
+ LexicalStringConversion.validate(value, item_type, pattern)
140
+ end
141
+
142
+ private
143
+
144
+ def numeric(value)
145
+ sign = value.negative? ? '-' : ''
66
146
  case value
67
- when ::Float::INFINITY
68
- 'INF'
69
- when -::Float::INFINITY
70
- '-INF'
71
- when Numeric
72
- float_value(value).to_s
147
+ when Integer
148
+ "#{sign}PT#{value.abs}S"
149
+ when BigDecimal
150
+ "#{sign}PT#{value.abs.to_s('F')}S"
73
151
  else
74
- LexicalStringConversion.validate(value, item_type, Patterns::FLOAT)
152
+ sprintf("%sPT%0.9fS", sign, value.abs)
75
153
  end
76
154
  end
77
155
  end
78
156
 
157
+ # Helper class for creating convertors for the various G* Date-related
158
+ # types that allow single values (GDay, GMonth, GYear).
79
159
  class GDateConversion
80
160
  attr_reader :bounds, :integer_formatter, :validation_pattern
81
161
 
82
- def initialize(args = {})
162
+ # @param args [Hash]
163
+ # @option args [Range] :bounds the integer bounds for values of this type
164
+ # @option args [Regexp] :validation_pattern the pattern used to validate the
165
+ # value when it's a String not an Integer
166
+ # @option args [Proc] :integer_formatter a proc/lambda that will produce a
167
+ # correctly-formatted lexical string from an Integer value
168
+ def initialize(args = {})
83
169
  @bounds = args.fetch(:bounds)
84
170
  @validation_pattern = args.fetch(:validation_pattern)
85
171
  @integer_formatter = args.fetch(:integer_formatter)
86
172
  end
87
173
 
174
+ # @param value [String, Integer] the value to convert
175
+ # @param item_type [XDM::ItemType] the type being converted to
176
+ # @return [String] a correctly formatted String
177
+ def call(value, item_type)
178
+ case value
179
+ when Integer
180
+ check_value_bounds!(value, item_type)
181
+ sprintf(integer_formatter.call(value), value)
182
+ else
183
+ formatted_value = LexicalStringConversion.validate(value, item_type, validation_pattern)
184
+ extract_and_check_value_bounds!(formatted_value, item_type)
185
+ formatted_value
186
+ end
187
+ end
188
+
189
+ private
190
+
88
191
  def extract_value_from_validated_format(formatted_value)
89
192
  Integer(formatted_value.gsub(validation_pattern, '\1'), 10)
90
193
  end
@@ -97,20 +200,27 @@ module Saxon
97
200
  def extract_and_check_value_bounds!(formatted_value, item_type)
98
201
  check_value_bounds!(extract_value_from_validated_format(formatted_value), item_type)
99
202
  end
203
+ end
204
+
205
+ # Convert Bytes. Idiomatically, Ruby uses +ASCII_8BIT+ encoded strings to
206
+ # represent bytes, and so a single character represents a single byte. XDM
207
+ # uses the decimal value of a signed or unsigned 8 bit integer
208
+ class ByteConversion
209
+ attr_reader :unpack_format
210
+
211
+ def initialize(kind = :signed)
212
+ @unpack_format = kind == :unsigned ? 'C' : 'c'
213
+ end
100
214
 
101
215
  def call(value, item_type)
102
- case value
103
- when Integer
104
- check_value_bounds!(value, item_type)
105
- sprintf(integer_formatter.call(value), value)
106
- else
107
- formatted_value = LexicalStringConversion.validate(value, item_type, validation_pattern)
108
- extract_and_check_value_bounds!(formatted_value, item_type)
109
- formatted_value
110
- end
216
+ raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
217
+ value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
218
+ value.unpack(unpack_format).first.to_s
111
219
  end
112
220
  end
113
221
 
222
+ # Pattern fragments that can be combined to help create the lexical space
223
+ # patterns in {Patterns}
114
224
  module PatternFragments
115
225
  TIME_DURATION = /(?:T
116
226
  (?:
@@ -132,6 +242,7 @@ module Saxon
132
242
  NAME_CHAR = ":|" + NCNAME_CHAR
133
243
  end
134
244
 
245
+ # A collection of lexical space patterns for XDM types
135
246
  module Patterns
136
247
  def self.build(*patterns)
137
248
  Regexp.new((['\A'] + patterns.map(&:to_s) + ['\z']).join(''))
@@ -159,6 +270,8 @@ module Saxon
159
270
  BASE64_BINARY = /\A(?:(?:[A-Za-z0-9+\/] ?){4})*(?:(?:[A-Za-z0-9+\/] ?){3}[A-Za-z0-9+\/]|(?:[A-Za-z0-9+\/] ?){2}[AEIMQUYcgkosw048] ?=|[A-Za-z0-9+\/] ?[AQgw] ?= ?=)?\z/
160
271
  end
161
272
 
273
+ # Convertors from Ruby values to lexical string representations for a
274
+ # particular XDM type
162
275
  module Convertors
163
276
  ANY_URI = ->(value, item_type) {
164
277
  uri_classes = [URI::Generic]
@@ -179,11 +292,7 @@ module Saxon
179
292
  BOOLEAN = ->(value, item_type) {
180
293
  value ? 'true' : 'false'
181
294
  }
182
- BYTE = ->(value, item_type) {
183
- raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
184
- value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
185
- value.unpack('c').first.to_s
186
- }
295
+ BYTE = ByteConversion.new
187
296
  DATE = ->(value, item_type) {
188
297
  if value.respond_to?(:strftime)
189
298
  value.strftime('%F')
@@ -202,21 +311,7 @@ module Saxon
202
311
  LexicalStringConversion.validate(value, item_type, Patterns::TIME)
203
312
  }
204
313
  DATE_TIME_STAMP = DATE_TIME
205
- DAY_TIME_DURATION = ->(value, item_type) {
206
- case value
207
- when Integer
208
- sign = value.negative? ? '-' : ''
209
- "#{sign}PT#{value.abs}S"
210
- when BigDecimal
211
- sign = value.negative? ? '-' : ''
212
- "#{sign}PT#{value.abs.to_s('F')}S"
213
- when Numeric
214
- sign = value.negative? ? '-' : ''
215
- sprintf("%sPT%0.9fS", sign, value.abs)
216
- else
217
- LexicalStringConversion.validate(value, item_type, Patterns::DAY_TIME_DURATION)
218
- end
219
- }
314
+ DAY_TIME_DURATION = DurationConversion.new(Patterns::DAY_TIME_DURATION)
220
315
  DECIMAL = ->(value, item_type) {
221
316
  case value
222
317
  when ::Integer
@@ -230,21 +325,7 @@ module Saxon
230
325
  end
231
326
  }
232
327
  DOUBLE = FloatConversion.new(:single)
233
- DURATION = ->(value, item_type) {
234
- case value
235
- when Integer
236
- sign = value.negative? ? '-' : ''
237
- "#{sign}PT#{value.abs}S"
238
- when BigDecimal
239
- sign = value.negative? ? '-' : ''
240
- "#{sign}PT#{value.abs.to_s('F')}S"
241
- when Numeric
242
- sign = value.negative? ? '-' : ''
243
- sprintf("%sPT%0.9fS", sign, value.abs)
244
- else
245
- LexicalStringConversion.validate(value, item_type, Patterns::DURATION)
246
- end
247
- }
328
+ DURATION = DurationConversion.new(Patterns::DURATION)
248
329
  FLOAT = FloatConversion.new
249
330
  G_DAY = GDateConversion.new({
250
331
  bounds: 1..31,
@@ -325,11 +406,7 @@ module Saxon
325
406
  TOKEN = ->(value, item_type) {
326
407
  LexicalStringConversion.validate(value, item_type, Patterns::TOKEN)
327
408
  }
328
- UNSIGNED_BYTE = ->(value, item_type) {
329
- raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
330
- value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
331
- value.unpack('C').first.to_s
332
- }
409
+ UNSIGNED_BYTE = ByteConversion.new(:unsigned)
333
410
  UNSIGNED_INT = IntegerConversion.new(0, 4294967295)
334
411
  UNSIGNED_LONG = IntegerConversion.new(0, 18446744073709551615)
335
412
  UNSIGNED_SHORT = IntegerConversion.new(0, 65535)
@@ -341,6 +418,7 @@ module Saxon
341
418
  }
342
419
  end
343
420
 
421
+ # Conversion process error classes
344
422
  module Errors
345
423
  # Raised during conversion from Ruby value to XDM Type lexical string
346
424
  # when the ruby value does not conform to the Type's string