saxon-rb 0.4.0-java → 0.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +429 -42
  3. data/Gemfile +2 -2
  4. data/README.md +317 -10
  5. data/Rakefile +237 -7
  6. data/lib/net/sf/saxon/Saxon-HE/{9.9.1-5/Saxon-HE-9.9.1-5.jar → 9.9.1-6/Saxon-HE-9.9.1-6.jar} +0 -0
  7. data/lib/saxon-rb.rb +1 -0
  8. data/lib/{saxon_jars.rb → saxon-rb_jars.rb} +2 -2
  9. data/lib/saxon.rb +13 -0
  10. data/lib/saxon/axis_iterator.rb +8 -1
  11. data/lib/saxon/configuration.rb +1 -0
  12. data/lib/saxon/item_type.rb +12 -17
  13. data/lib/saxon/item_type/lexical_string_conversion.rb +136 -58
  14. data/lib/saxon/item_type/value_to_ruby.rb +13 -0
  15. data/lib/saxon/loader.rb +4 -1
  16. data/lib/saxon/nokogiri.rb +78 -0
  17. data/lib/saxon/occurrence_indicator.rb +32 -3
  18. data/lib/saxon/processor.rb +32 -1
  19. data/lib/saxon/qname.rb +37 -2
  20. data/lib/saxon/s9api.rb +5 -0
  21. data/lib/saxon/sequence_type.rb +131 -0
  22. data/lib/saxon/source.rb +207 -71
  23. data/lib/saxon/version.rb +1 -1
  24. data/lib/saxon/xdm.rb +7 -0
  25. data/lib/saxon/xdm/array.rb +16 -0
  26. data/lib/saxon/xdm/atomic_value.rb +7 -1
  27. data/lib/saxon/xdm/empty_sequence.rb +13 -0
  28. data/lib/saxon/xdm/external_object.rb +1 -0
  29. data/lib/saxon/xdm/function_item.rb +1 -0
  30. data/lib/saxon/xdm/item.rb +7 -0
  31. data/lib/saxon/xdm/map.rb +38 -0
  32. data/lib/saxon/xdm/node.rb +19 -1
  33. data/lib/saxon/xdm/sequence_like.rb +15 -0
  34. data/lib/saxon/xdm/value.rb +21 -5
  35. data/lib/saxon/xpath.rb +9 -0
  36. data/lib/saxon/xpath/compiler.rb +36 -1
  37. data/lib/saxon/xpath/executable.rb +53 -28
  38. data/lib/saxon/xpath/static_context.rb +19 -39
  39. data/lib/saxon/xpath/variable_declaration.rb +16 -49
  40. data/lib/saxon/xslt.rb +12 -0
  41. data/lib/saxon/xslt/compiler.rb +75 -6
  42. data/lib/saxon/xslt/evaluation_context.rb +19 -3
  43. data/lib/saxon/xslt/executable.rb +204 -14
  44. data/saxon-rb.gemspec +1 -1
  45. metadata +9 -7
  46. data/saxon.gemspec +0 -30
data/lib/saxon-rb.rb ADDED
@@ -0,0 +1 @@
1
+ require_relative 'saxon'
@@ -2,9 +2,9 @@
2
2
  begin
3
3
  require 'jar_dependencies'
4
4
  rescue LoadError
5
- require 'net/sf/saxon/Saxon-HE/9.9.1-5/Saxon-HE-9.9.1-5.jar'
5
+ require 'net/sf/saxon/Saxon-HE/9.9.1-6/Saxon-HE-9.9.1-6.jar'
6
6
  end
7
7
 
8
8
  if defined? Jars
9
- require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-5'
9
+ require_jar 'net.sf.saxon', 'Saxon-HE', '9.9.1-6'
10
10
  end
data/lib/saxon.rb CHANGED
@@ -3,4 +3,17 @@ require_relative 'saxon/processor'
3
3
 
4
4
  # An idiomatic Ruby wrapper around the Saxon XML processing library.
5
5
  module Saxon
6
+ # Parse an XML document using a vanilla {DocumentBuilder} from the default
7
+ # {Processor}
8
+ # @see Saxon::Processor#XML
9
+ def self.XML(input, opts = {})
10
+ Processor.default.XML(input, opts)
11
+ end
12
+
13
+ # Compile an XSLT Stylesheet using a new {XSLT::Compiler} from the default
14
+ # {Processor}
15
+ # @see Saxon::Processor#XSLT
16
+ def self.XSLT(input, opts = {}, &block)
17
+ Processor.default.XSLT(input, opts, &block)
18
+ end
6
19
  end
@@ -1,7 +1,12 @@
1
1
  require_relative 's9api'
2
2
 
3
3
  module Saxon
4
- # An XPath Data Model Node object, representing an XML document, or an element or one of the other node chunks in the XDM.
4
+ # An iterator across an XPath axis of an XDM document, e.g. down to children
5
+ # (+child+), up to the root (+ancestor+)
6
+ # @example iterate over child nodes
7
+ # AxisIterator.new(node, :child).each do |child_node|
8
+ # puts child_node.node_name
9
+ # end
5
10
  class AxisIterator
6
11
  include Enumerable
7
12
 
@@ -18,6 +23,8 @@ module Saxon
18
23
  s9_sequence_iterator
19
24
  end
20
25
 
26
+ # yields each node in the sequence
27
+ # @yieldparam [Saxon::XDM::Node] the next node in the sequence
21
28
  def each(&block)
22
29
  s9_sequence_iterator.lazy.map { |s9_xdm_node| Saxon::XDM::Node.new(s9_xdm_node) }.each(&block)
23
30
  end
@@ -10,6 +10,7 @@ module Saxon
10
10
  # for details of the constant names used to access the values
11
11
  class Configuration
12
12
  DEFAULT_SEMAPHORE = Mutex.new
13
+ private_constant :DEFAULT_SEMAPHORE
13
14
 
14
15
  # Provides a processor with default configuration. Essentially a singleton
15
16
  # instance
@@ -13,6 +13,7 @@ module Saxon
13
13
  @class_name = class_name
14
14
  end
15
15
 
16
+ # error message including class name no type equivalent found for
16
17
  def to_s
17
18
  "Ruby class <#{@class_name}> has no XDM type equivalent"
18
19
  end
@@ -26,29 +27,14 @@ module Saxon
26
27
  @type_str = type_str
27
28
  end
28
29
 
30
+ # error message including type string with no matching built-in type
29
31
  def to_s
30
32
  "'#{@type_str}' is not recognised as an XSD built-in type"
31
33
  end
32
34
  end
33
35
 
34
- class Factory
35
- DEFAULT_SEMAPHORE = Mutex.new
36
-
37
- attr_reader :processor
38
-
39
- def initialize(processor)
40
- @processor = processor
41
- end
42
-
43
- def s9_factory
44
- return @s9_factory if instance_variable_defined?(:@s9_factory)
45
- DEFAULT_SEMAPHORE.synchronize do
46
- @s9_factory = S9API::ItemTypeFactory.new(processor.to_java)
47
- end
48
- end
49
- end
50
-
51
36
  TYPE_CACHE_MUTEX = Mutex.new
37
+ private_constant :TYPE_CACHE_MUTEX
52
38
  # A mapping of Ruby types to XDM type constants
53
39
  TYPE_MAPPING = {
54
40
  'String' => :STRING,
@@ -61,6 +47,8 @@ module Saxon
61
47
  'Time' => :DATE_TIME,
62
48
  'BigDecimal' => :DECIMAL,
63
49
  'Integer' => :INTEGER,
50
+ 'Fixnum' => :INTEGER, # Fixnum/Bignum needed for JRuby 9.1/Ruby 2.3
51
+ 'Bignum' => :INTEGER,
64
52
  'Float' => :FLOAT,
65
53
  'Numeric' => :NUMERIC
66
54
  }.freeze
@@ -132,12 +120,14 @@ module Saxon
132
120
  Hash[QNAME_MAPPING.map { |qname, v| [qname.to_s, v] }]
133
121
  ).freeze
134
122
 
123
+ # convertors to generate lexical strings for a given {ItemType}, as a hash keyed on the ItemType
135
124
  ATOMIC_VALUE_LEXICAL_STRING_CONVERTORS = Hash[
136
125
  LexicalStringConversion::Convertors.constants.map { |const|
137
126
  [S9API::ItemType.const_get(const), LexicalStringConversion::Convertors.const_get(const)]
138
127
  }
139
128
  ].freeze
140
129
 
130
+ # convertors from {XDM::AtomicValue} to a ruby primitve value, as a hash keyed on the ItemType
141
131
  ATOMIC_VALUE_TO_RUBY_CONVERTORS = Hash[
142
132
  ValueToRuby::Convertors.constants.map { |const|
143
133
  [S9API::ItemType.const_get(const), ValueToRuby::Convertors.const_get(const)]
@@ -155,6 +145,7 @@ module Saxon
155
145
  # @overload get_type(type_name)
156
146
  # Get the {ItemType} for the name
157
147
  # @param type_name [String] name of the built-in {ItemType} to fetch
148
+ # (e.g. +xs:string+ or +element()+)
158
149
  # @overload get_type(item_type)
159
150
  # Given an instance of {ItemType}, simply return the instance
160
151
  # @param item_type [Saxon::ItemType] an existing ItemType instance
@@ -182,6 +173,8 @@ module Saxon
182
173
 
183
174
  def get_s9_type(arg)
184
175
  case arg
176
+ when S9API::ItemType
177
+ arg
185
178
  when Saxon::QName
186
179
  get_s9_qname_mapped_type(arg)
187
180
  when Class
@@ -250,6 +243,8 @@ module Saxon
250
243
 
251
244
  alias_method :eql?, :==
252
245
 
246
+ # Return a hash code so this can be used as a key in a {::Hash}.
247
+ # @return [Fixnum] the hash code
253
248
  def hash
254
249
  @hash ||= s9_item_type.hashCode
255
250
  end
@@ -5,12 +5,26 @@ module Saxon
5
5
  # A collection of lamba-like objects for converting Ruby values into
6
6
  # lexical strings for specific XSD datatypes
7
7
  module LexicalStringConversion
8
+ # Simple validation helper that checks if a value string matches an
9
+ # allowed lexical string pattern space or not.
10
+ #
11
+ # @param value [Object] the value whose to_s representation should be
12
+ # checked
13
+ # @param item_type [Saxon::ItemType] the ItemType whose lexical pattern
14
+ # space should be checked against
15
+ # @param pattern [Regexp] the lexical pattern space Regexp to use in the
16
+ # checking
17
+ # @return [String] the lexical string for the value and type
18
+ # @raise [Errors::BadRubyValue] if the ruby value doesn't produce a string
19
+ # which validates against the allowed pattern
8
20
  def self.validate(value, item_type, pattern)
9
21
  str = value.to_s
10
- raise Errors::BadRubyValue.new(value, item_type) unless str.match?(pattern)
22
+ raise Errors::BadRubyValue.new(value, item_type) if str.match(pattern).nil?
11
23
  str
12
24
  end
13
25
 
26
+ # Helper class for performing conversion and validation to XDM integer
27
+ # types from Ruby's Fixnum/Bignum/Integer classes
14
28
  class IntegerConversion
15
29
  attr_reader :min, :max
16
30
 
@@ -18,20 +32,40 @@ module Saxon
18
32
  @min, @max = min, max
19
33
  end
20
34
 
35
+ # Returns whether the Ruby integer is within the range allowed for the
36
+ # XDM type
37
+ # @param integer_value [Integer] the ruby integer to check
38
+ # @return [Boolean] whether the value is within bounds
21
39
  def in_bounds?(integer_value)
22
40
  gte_min?(integer_value) && lte_max?(integer_value)
23
41
  end
24
42
 
43
+ # Returns whether the Ruby integer is >= the lower bound of the range
44
+ # allowed for the XDM type
45
+ # @param integer_value [Integer] the ruby integer to check
46
+ # @return [Boolean] whether the value is okay
25
47
  def gte_min?(integer_value)
26
48
  return true if min.nil?
27
49
  integer_value >= min
28
50
  end
29
51
 
52
+ # Returns whether the Ruby integer is <= the upper bound of the range
53
+ # allowed for the XDM type
54
+ # @param integer_value [Integer] the ruby integer to check
55
+ # @return [Boolean] whether the value is okay
30
56
  def lte_max?(integer_value)
31
57
  return true if max.nil?
32
58
  integer_value <= max
33
59
  end
34
60
 
61
+ # Check a value against our type constraints, and return the lexical
62
+ # string representation if it's okay.
63
+ #
64
+ # @param value [Integer] the ruby value
65
+ # @param item_type [Saxon::ItemType] the item type
66
+ # @return [String] the lexical string representation of the value
67
+ # @raise [Errors::RubyValueOutOfBounds] if the value is outside the
68
+ # type's permitted bounds
35
69
  def call(value, item_type)
36
70
  integer_value = case value
37
71
  when ::Numeric
@@ -44,15 +78,42 @@ module Saxon
44
78
  end
45
79
  end
46
80
 
81
+ # Helper class for performing conversion and validation to XDM
82
+ # Floating-point types from Ruby's Float class
47
83
  class FloatConversion
48
84
  def initialize(size = :double)
49
85
  @double = size == :double
50
86
  end
51
87
 
88
+ # Check a value against our type constraints, and return the lexical
89
+ # string representation if it's okay.
90
+ #
91
+ # @param value [Float] the ruby value
92
+ # @param item_type [Saxon::ItemType] the item type
93
+ # @return [String] the lexical string representation of the value
94
+ def call(value, item_type)
95
+ case value
96
+ when ::Float::INFINITY
97
+ 'INF'
98
+ when -::Float::INFINITY
99
+ '-INF'
100
+ when Numeric
101
+ float_value(value).to_s
102
+ else
103
+ LexicalStringConversion.validate(value, item_type, Patterns::FLOAT)
104
+ end
105
+ end
106
+
107
+ private
108
+
109
+ # Is this a double-precision XDM float?
110
+ # @return [Boolean] true if we're converting a double-precision float
52
111
  def double?
53
112
  @double
54
113
  end
55
114
 
115
+ # Return the float as either a double-precision or single-precision
116
+ # float as needed
56
117
  def float_value(float_value)
57
118
  return float_value if double?
58
119
  convert_to_single_precision(float_value)
@@ -61,30 +122,72 @@ module Saxon
61
122
  def convert_to_single_precision(float_value)
62
123
  [float_value].pack('f').unpack('f').first
63
124
  end
125
+ end
126
+
127
+ # Convert a value in seconds into an XDM Duration string
128
+ class DurationConversion
129
+ attr_reader :pattern
64
130
 
131
+ def initialize(pattern)
132
+ @pattern = pattern
133
+ end
134
+
135
+ # Produce a lexical Duration string from a numeric Ruby value
136
+ # representing seconds
65
137
  def call(value, item_type)
138
+ return numeric(value) if Numeric === value
139
+ LexicalStringConversion.validate(value, item_type, pattern)
140
+ end
141
+
142
+ private
143
+
144
+ def numeric(value)
145
+ sign = value.negative? ? '-' : ''
66
146
  case value
67
- when ::Float::INFINITY
68
- 'INF'
69
- when -::Float::INFINITY
70
- '-INF'
71
- when Numeric
72
- float_value(value).to_s
147
+ when Integer
148
+ "#{sign}PT#{value.abs}S"
149
+ when BigDecimal
150
+ "#{sign}PT#{value.abs.to_s('F')}S"
73
151
  else
74
- LexicalStringConversion.validate(value, item_type, Patterns::FLOAT)
152
+ sprintf("%sPT%0.9fS", sign, value.abs)
75
153
  end
76
154
  end
77
155
  end
78
156
 
157
+ # Helper class for creating convertors for the various G* Date-related
158
+ # types that allow single values (GDay, GMonth, GYear).
79
159
  class GDateConversion
80
160
  attr_reader :bounds, :integer_formatter, :validation_pattern
81
161
 
82
- def initialize(args = {})
162
+ # @param args [Hash]
163
+ # @option args [Range] :bounds the integer bounds for values of this type
164
+ # @option args [Regexp] :validation_pattern the pattern used to validate the
165
+ # value when it's a String not an Integer
166
+ # @option args [Proc] :integer_formatter a proc/lambda that will produce a
167
+ # correctly-formatted lexical string from an Integer value
168
+ def initialize(args = {})
83
169
  @bounds = args.fetch(:bounds)
84
170
  @validation_pattern = args.fetch(:validation_pattern)
85
171
  @integer_formatter = args.fetch(:integer_formatter)
86
172
  end
87
173
 
174
+ # @param value [String, Integer] the value to convert
175
+ # @param item_type [XDM::ItemType] the type being converted to
176
+ # @return [String] a correctly formatted String
177
+ def call(value, item_type)
178
+ case value
179
+ when Integer
180
+ check_value_bounds!(value, item_type)
181
+ sprintf(integer_formatter.call(value), value)
182
+ else
183
+ formatted_value = LexicalStringConversion.validate(value, item_type, validation_pattern)
184
+ extract_and_check_value_bounds!(formatted_value, item_type)
185
+ formatted_value
186
+ end
187
+ end
188
+
189
+ private
190
+
88
191
  def extract_value_from_validated_format(formatted_value)
89
192
  Integer(formatted_value.gsub(validation_pattern, '\1'), 10)
90
193
  end
@@ -97,20 +200,27 @@ module Saxon
97
200
  def extract_and_check_value_bounds!(formatted_value, item_type)
98
201
  check_value_bounds!(extract_value_from_validated_format(formatted_value), item_type)
99
202
  end
203
+ end
204
+
205
+ # Convert Bytes. Idiomatically, Ruby uses +ASCII_8BIT+ encoded strings to
206
+ # represent bytes, and so a single character represents a single byte. XDM
207
+ # uses the decimal value of a signed or unsigned 8 bit integer
208
+ class ByteConversion
209
+ attr_reader :unpack_format
210
+
211
+ def initialize(kind = :signed)
212
+ @unpack_format = kind == :unsigned ? 'C' : 'c'
213
+ end
100
214
 
101
215
  def call(value, item_type)
102
- case value
103
- when Integer
104
- check_value_bounds!(value, item_type)
105
- sprintf(integer_formatter.call(value), value)
106
- else
107
- formatted_value = LexicalStringConversion.validate(value, item_type, validation_pattern)
108
- extract_and_check_value_bounds!(formatted_value, item_type)
109
- formatted_value
110
- end
216
+ raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
217
+ value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
218
+ value.unpack(unpack_format).first.to_s
111
219
  end
112
220
  end
113
221
 
222
+ # Pattern fragments that can be combined to help create the lexical space
223
+ # patterns in {Patterns}
114
224
  module PatternFragments
115
225
  TIME_DURATION = /(?:T
116
226
  (?:
@@ -132,6 +242,7 @@ module Saxon
132
242
  NAME_CHAR = ":|" + NCNAME_CHAR
133
243
  end
134
244
 
245
+ # A collection of lexical space patterns for XDM types
135
246
  module Patterns
136
247
  def self.build(*patterns)
137
248
  Regexp.new((['\A'] + patterns.map(&:to_s) + ['\z']).join(''))
@@ -159,6 +270,8 @@ module Saxon
159
270
  BASE64_BINARY = /\A(?:(?:[A-Za-z0-9+\/] ?){4})*(?:(?:[A-Za-z0-9+\/] ?){3}[A-Za-z0-9+\/]|(?:[A-Za-z0-9+\/] ?){2}[AEIMQUYcgkosw048] ?=|[A-Za-z0-9+\/] ?[AQgw] ?= ?=)?\z/
160
271
  end
161
272
 
273
+ # Convertors from Ruby values to lexical string representations for a
274
+ # particular XDM type
162
275
  module Convertors
163
276
  ANY_URI = ->(value, item_type) {
164
277
  uri_classes = [URI::Generic]
@@ -179,11 +292,7 @@ module Saxon
179
292
  BOOLEAN = ->(value, item_type) {
180
293
  value ? 'true' : 'false'
181
294
  }
182
- BYTE = ->(value, item_type) {
183
- raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
184
- value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
185
- value.unpack('c').first.to_s
186
- }
295
+ BYTE = ByteConversion.new
187
296
  DATE = ->(value, item_type) {
188
297
  if value.respond_to?(:strftime)
189
298
  value.strftime('%F')
@@ -202,21 +311,7 @@ module Saxon
202
311
  LexicalStringConversion.validate(value, item_type, Patterns::TIME)
203
312
  }
204
313
  DATE_TIME_STAMP = DATE_TIME
205
- DAY_TIME_DURATION = ->(value, item_type) {
206
- case value
207
- when Integer
208
- sign = value.negative? ? '-' : ''
209
- "#{sign}PT#{value.abs}S"
210
- when BigDecimal
211
- sign = value.negative? ? '-' : ''
212
- "#{sign}PT#{value.abs.to_s('F')}S"
213
- when Numeric
214
- sign = value.negative? ? '-' : ''
215
- sprintf("%sPT%0.9fS", sign, value.abs)
216
- else
217
- LexicalStringConversion.validate(value, item_type, Patterns::DAY_TIME_DURATION)
218
- end
219
- }
314
+ DAY_TIME_DURATION = DurationConversion.new(Patterns::DAY_TIME_DURATION)
220
315
  DECIMAL = ->(value, item_type) {
221
316
  case value
222
317
  when ::Integer
@@ -230,21 +325,7 @@ module Saxon
230
325
  end
231
326
  }
232
327
  DOUBLE = FloatConversion.new(:single)
233
- DURATION = ->(value, item_type) {
234
- case value
235
- when Integer
236
- sign = value.negative? ? '-' : ''
237
- "#{sign}PT#{value.abs}S"
238
- when BigDecimal
239
- sign = value.negative? ? '-' : ''
240
- "#{sign}PT#{value.abs.to_s('F')}S"
241
- when Numeric
242
- sign = value.negative? ? '-' : ''
243
- sprintf("%sPT%0.9fS", sign, value.abs)
244
- else
245
- LexicalStringConversion.validate(value, item_type, Patterns::DURATION)
246
- end
247
- }
328
+ DURATION = DurationConversion.new(Patterns::DURATION)
248
329
  FLOAT = FloatConversion.new
249
330
  G_DAY = GDateConversion.new({
250
331
  bounds: 1..31,
@@ -325,11 +406,7 @@ module Saxon
325
406
  TOKEN = ->(value, item_type) {
326
407
  LexicalStringConversion.validate(value, item_type, Patterns::TOKEN)
327
408
  }
328
- UNSIGNED_BYTE = ->(value, item_type) {
329
- raise Errors::RubyValueOutOfBounds.new(value, item_type) if value.bytesize != 1
330
- value = value.to_s.force_encoding(Encoding::ASCII_8BIT)
331
- value.unpack('C').first.to_s
332
- }
409
+ UNSIGNED_BYTE = ByteConversion.new(:unsigned)
333
410
  UNSIGNED_INT = IntegerConversion.new(0, 4294967295)
334
411
  UNSIGNED_LONG = IntegerConversion.new(0, 18446744073709551615)
335
412
  UNSIGNED_SHORT = IntegerConversion.new(0, 65535)
@@ -341,6 +418,7 @@ module Saxon
341
418
  }
342
419
  end
343
420
 
421
+ # Conversion process error classes
344
422
  module Errors
345
423
  # Raised during conversion from Ruby value to XDM Type lexical string
346
424
  # when the ruby value does not conform to the Type's string