regexp_parser 2.6.0 → 2.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +17 -3
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  18. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  19. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  20. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  21. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  22. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  23. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  24. data/lib/regexp_parser/expression/sequence.rb +5 -10
  25. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  26. data/lib/regexp_parser/expression/shared.rb +37 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  28. data/lib/regexp_parser/expression.rb +34 -31
  29. data/lib/regexp_parser/lexer.rb +76 -36
  30. data/lib/regexp_parser/parser.rb +101 -100
  31. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  32. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  33. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  34. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  35. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  36. data/lib/regexp_parser/scanner/property.rl +2 -2
  37. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  38. data/lib/regexp_parser/scanner.rb +1132 -1283
  39. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  42. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  44. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  45. data/lib/regexp_parser/syntax/token.rb +13 -13
  46. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  47. data/lib/regexp_parser/syntax/versions.rb +3 -1
  48. data/lib/regexp_parser/syntax.rb +1 -1
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +6 -6
  51. data/regexp_parser.gemspec +5 -5
  52. metadata +14 -8
  53. data/CHANGELOG.md +0 -601
  54. data/README.md +0 -503
@@ -1,5 +1,5 @@
1
- require 'regexp_parser/error'
2
- require 'regexp_parser/expression'
1
+ require_relative 'error'
2
+ require_relative 'expression'
3
3
 
4
4
  class Regexp::Parser
5
5
  include Regexp::Expression
@@ -18,11 +18,11 @@ class Regexp::Parser
18
18
  end
19
19
  end
20
20
 
21
- def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
+ def self.parse(input, syntax = nil, options: nil, &block)
22
22
  new.parse(input, syntax, options: options, &block)
23
23
  end
24
24
 
25
- def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
25
+ def parse(input, syntax = nil, options: nil, &block)
26
26
  root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
@@ -35,7 +35,7 @@ class Regexp::Parser
35
35
 
36
36
  self.captured_group_counts = Hash.new(0)
37
37
 
38
- Regexp::Lexer.scan(input, syntax, options: options) do |token|
38
+ Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
39
39
  parse_token(token)
40
40
  end
41
41
 
@@ -232,7 +232,7 @@ class Regexp::Parser
232
232
  node << Backreference::NameRecursionLevel.new(token, active_opts)
233
233
  when :name_call
234
234
  node << Backreference::NameCall.new(token, active_opts)
235
- when :number, :number_ref
235
+ when :number, :number_ref # TODO: split in v3.0.0
236
236
  node << Backreference::Number.new(token, active_opts)
237
237
  when :number_recursion_ref
238
238
  node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
@@ -272,9 +272,9 @@ class Regexp::Parser
272
272
  nest_conditional(Conditional::Expression.new(token, active_opts))
273
273
  when :condition
274
274
  conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
275
- conditional_nesting.last.add_sequence(active_opts)
275
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
276
276
  when :separator
277
- conditional_nesting.last.add_sequence(active_opts)
277
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
278
278
  self.node = conditional_nesting.last.branches.last
279
279
  when :close
280
280
  conditional_nesting.pop
@@ -322,6 +322,7 @@ class Regexp::Parser
322
322
 
323
323
  when :control
324
324
  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
325
+ # TODO: emit :meta_control_sequence token in v3.0.0
325
326
  node << EscapeSequence::MetaControl.new(token, active_opts)
326
327
  else
327
328
  node << EscapeSequence::Control.new(token, active_opts)
@@ -329,6 +330,7 @@ class Regexp::Parser
329
330
 
330
331
  when :meta_sequence
331
332
  if token.text =~ /\A\\M-\\[Cc]/
333
+ # TODO: emit :meta_control_sequence token in v3.0.0:
332
334
  node << EscapeSequence::MetaControl.new(token, active_opts)
333
335
  else
334
336
  node << EscapeSequence::Meta.new(token, active_opts)
@@ -349,11 +351,7 @@ class Regexp::Parser
349
351
  when :comment
350
352
  node << Comment.new(token, active_opts)
351
353
  when :whitespace
352
- if node.last.is_a?(WhiteSpace)
353
- node.last.merge(WhiteSpace.new(token, active_opts))
354
- else
355
- node << WhiteSpace.new(token, active_opts)
356
- end
354
+ node << WhiteSpace.new(token, active_opts)
357
355
  else
358
356
  raise UnknownTokenError.new('FreeSpace', token)
359
357
  end
@@ -379,98 +377,99 @@ class Regexp::Parser
379
377
  end
380
378
 
381
379
  def sequence_operation(klass, token)
382
- unless node.is_a?(klass)
380
+ unless node.instance_of?(klass)
383
381
  operator = klass.new(token, active_opts)
384
- sequence = operator.add_sequence(active_opts)
382
+ sequence = operator.add_sequence(active_opts, { ts: token.ts })
385
383
  sequence.expressions = node.expressions
386
384
  node.expressions = []
387
385
  nest(operator)
388
386
  end
389
- node.add_sequence(active_opts)
387
+ node.add_sequence(active_opts, { ts: token.te })
390
388
  end
391
389
 
392
390
  def posixclass(token)
393
391
  node << PosixClass.new(token, active_opts)
394
392
  end
395
393
 
396
- include Regexp::Expression::UnicodeProperty
397
- UPTokens = Regexp::Syntax::Token::UnicodeProperty
394
+ UP = Regexp::Expression::Property
395
+ UPTokens = Regexp::Syntax::Token::Property
398
396
 
399
397
  def property(token)
400
398
  case token.token
401
- when :alnum; node << Alnum.new(token, active_opts)
402
- when :alpha; node << Alpha.new(token, active_opts)
403
- when :ascii; node << Ascii.new(token, active_opts)
404
- when :blank; node << Blank.new(token, active_opts)
405
- when :cntrl; node << Cntrl.new(token, active_opts)
406
- when :digit; node << Digit.new(token, active_opts)
407
- when :graph; node << Graph.new(token, active_opts)
408
- when :lower; node << Lower.new(token, active_opts)
409
- when :print; node << Print.new(token, active_opts)
410
- when :punct; node << Punct.new(token, active_opts)
411
- when :space; node << Space.new(token, active_opts)
412
- when :upper; node << Upper.new(token, active_opts)
413
- when :word; node << Word.new(token, active_opts)
414
- when :xdigit; node << Xdigit.new(token, active_opts)
415
- when :xposixpunct; node << XPosixPunct.new(token, active_opts)
399
+ when :alnum; node << UP::Alnum.new(token, active_opts)
400
+ when :alpha; node << UP::Alpha.new(token, active_opts)
401
+ when :ascii; node << UP::Ascii.new(token, active_opts)
402
+ when :blank; node << UP::Blank.new(token, active_opts)
403
+ when :cntrl; node << UP::Cntrl.new(token, active_opts)
404
+ when :digit; node << UP::Digit.new(token, active_opts)
405
+ when :graph; node << UP::Graph.new(token, active_opts)
406
+ when :lower; node << UP::Lower.new(token, active_opts)
407
+ when :print; node << UP::Print.new(token, active_opts)
408
+ when :punct; node << UP::Punct.new(token, active_opts)
409
+ when :space; node << UP::Space.new(token, active_opts)
410
+ when :upper; node << UP::Upper.new(token, active_opts)
411
+ when :word; node << UP::Word.new(token, active_opts)
412
+ when :xdigit; node << UP::Xdigit.new(token, active_opts)
413
+ when :xposixpunct; node << UP::XPosixPunct.new(token, active_opts)
416
414
 
417
415
  # only in Oniguruma (old rubies)
418
- when :newline; node << Newline.new(token, active_opts)
419
-
420
- when :any; node << Any.new(token, active_opts)
421
- when :assigned; node << Assigned.new(token, active_opts)
422
-
423
- when :letter; node << Letter::Any.new(token, active_opts)
424
- when :cased_letter; node << Letter::Cased.new(token, active_opts)
425
- when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
426
- when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
427
- when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
428
- when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
429
- when :other_letter; node << Letter::Other.new(token, active_opts)
430
-
431
- when :mark; node << Mark::Any.new(token, active_opts)
432
- when :combining_mark; node << Mark::Combining.new(token, active_opts)
433
- when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
434
- when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
435
- when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
436
-
437
- when :number; node << Number::Any.new(token, active_opts)
438
- when :decimal_number; node << Number::Decimal.new(token, active_opts)
439
- when :letter_number; node << Number::Letter.new(token, active_opts)
440
- when :other_number; node << Number::Other.new(token, active_opts)
441
-
442
- when :punctuation; node << Punctuation::Any.new(token, active_opts)
443
- when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
444
- when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
445
- when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
446
- when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
447
- when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
448
- when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
449
- when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
450
-
451
- when :separator; node << Separator::Any.new(token, active_opts)
452
- when :space_separator; node << Separator::Space.new(token, active_opts)
453
- when :line_separator; node << Separator::Line.new(token, active_opts)
454
- when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
455
-
456
- when :symbol; node << Symbol::Any.new(token, active_opts)
457
- when :math_symbol; node << Symbol::Math.new(token, active_opts)
458
- when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
459
- when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
460
- when :other_symbol; node << Symbol::Other.new(token, active_opts)
461
-
462
- when :other; node << Codepoint::Any.new(token, active_opts)
463
- when :control; node << Codepoint::Control.new(token, active_opts)
464
- when :format; node << Codepoint::Format.new(token, active_opts)
465
- when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
466
- when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
467
- when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
468
-
469
- when *UPTokens::Age; node << Age.new(token, active_opts)
470
- when *UPTokens::Derived; node << Derived.new(token, active_opts)
471
- when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
472
- when *UPTokens::Script; node << Script.new(token, active_opts)
473
- when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
416
+ when :newline; node << UP::Newline.new(token, active_opts)
417
+
418
+ when :any; node << UP::Any.new(token, active_opts)
419
+ when :assigned; node << UP::Assigned.new(token, active_opts)
420
+
421
+ when :letter; node << UP::Letter::Any.new(token, active_opts)
422
+ when :cased_letter; node << UP::Letter::Cased.new(token, active_opts)
423
+ when :uppercase_letter; node << UP::Letter::Uppercase.new(token, active_opts)
424
+ when :lowercase_letter; node << UP::Letter::Lowercase.new(token, active_opts)
425
+ when :titlecase_letter; node << UP::Letter::Titlecase.new(token, active_opts)
426
+ when :modifier_letter; node << UP::Letter::Modifier.new(token, active_opts)
427
+ when :other_letter; node << UP::Letter::Other.new(token, active_opts)
428
+
429
+ when :mark; node << UP::Mark::Any.new(token, active_opts)
430
+ when :combining_mark; node << UP::Mark::Combining.new(token, active_opts)
431
+ when :nonspacing_mark; node << UP::Mark::Nonspacing.new(token, active_opts)
432
+ when :spacing_mark; node << UP::Mark::Spacing.new(token, active_opts)
433
+ when :enclosing_mark; node << UP::Mark::Enclosing.new(token, active_opts)
434
+
435
+ when :number; node << UP::Number::Any.new(token, active_opts)
436
+ when :decimal_number; node << UP::Number::Decimal.new(token, active_opts)
437
+ when :letter_number; node << UP::Number::Letter.new(token, active_opts)
438
+ when :other_number; node << UP::Number::Other.new(token, active_opts)
439
+
440
+ when :punctuation; node << UP::Punctuation::Any.new(token, active_opts)
441
+ when :connector_punctuation; node << UP::Punctuation::Connector.new(token, active_opts)
442
+ when :dash_punctuation; node << UP::Punctuation::Dash.new(token, active_opts)
443
+ when :open_punctuation; node << UP::Punctuation::Open.new(token, active_opts)
444
+ when :close_punctuation; node << UP::Punctuation::Close.new(token, active_opts)
445
+ when :initial_punctuation; node << UP::Punctuation::Initial.new(token, active_opts)
446
+ when :final_punctuation; node << UP::Punctuation::Final.new(token, active_opts)
447
+ when :other_punctuation; node << UP::Punctuation::Other.new(token, active_opts)
448
+
449
+ when :separator; node << UP::Separator::Any.new(token, active_opts)
450
+ when :space_separator; node << UP::Separator::Space.new(token, active_opts)
451
+ when :line_separator; node << UP::Separator::Line.new(token, active_opts)
452
+ when :paragraph_separator; node << UP::Separator::Paragraph.new(token, active_opts)
453
+
454
+ when :symbol; node << UP::Symbol::Any.new(token, active_opts)
455
+ when :math_symbol; node << UP::Symbol::Math.new(token, active_opts)
456
+ when :currency_symbol; node << UP::Symbol::Currency.new(token, active_opts)
457
+ when :modifier_symbol; node << UP::Symbol::Modifier.new(token, active_opts)
458
+ when :other_symbol; node << UP::Symbol::Other.new(token, active_opts)
459
+
460
+ when :other; node << UP::Codepoint::Any.new(token, active_opts)
461
+ when :control; node << UP::Codepoint::Control.new(token, active_opts)
462
+ when :format; node << UP::Codepoint::Format.new(token, active_opts)
463
+ when :surrogate; node << UP::Codepoint::Surrogate.new(token, active_opts)
464
+ when :private_use; node << UP::Codepoint::PrivateUse.new(token, active_opts)
465
+ when :unassigned; node << UP::Codepoint::Unassigned.new(token, active_opts)
466
+
467
+ when *UPTokens::Age; node << UP::Age.new(token, active_opts)
468
+ when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
469
+ when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
470
+ when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
471
+ when *UPTokens::Script; node << UP::Script.new(token, active_opts)
472
+ when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
474
473
 
475
474
  else
476
475
  raise UnknownTokenError.new('UnicodeProperty', token)
@@ -478,8 +477,7 @@ class Regexp::Parser
478
477
  end
479
478
 
480
479
  def quantifier(token)
481
- target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
482
- target_node or raise ParserError, "No valid target found for '#{token.text}'"
480
+ target_node = node.extract_quantifier_target(token.text)
483
481
 
484
482
  # in case of chained quantifiers, wrap target in an implicit passive group
485
483
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
@@ -527,6 +525,8 @@ class Regexp::Parser
527
525
  end
528
526
 
529
527
  def open_set(token)
528
+ # TODO: this and Quantifier are the only cases where Expression#token
529
+ # does not match the scanner/lexer output. Fix in v3.0.0.
530
530
  token.token = :character
531
531
  nest(CharacterSet.new(token, active_opts))
532
532
  end
@@ -541,7 +541,7 @@ class Regexp::Parser
541
541
 
542
542
  def range(token)
543
543
  exp = CharacterSet::Range.new(token, active_opts)
544
- scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
544
+ scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
545
545
  exp << scope.expressions.pop
546
546
  nest(exp)
547
547
  end
@@ -568,28 +568,29 @@ class Regexp::Parser
568
568
  end
569
569
 
570
570
  def close_completed_character_set_range
571
- decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
571
+ decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
572
572
  end
573
573
 
574
574
  def active_opts
575
575
  options_stack.last
576
576
  end
577
577
 
578
- # Assigns referenced expressions to refering expressions, e.g. if there is
578
+ # Assigns referenced expressions to referring expressions, e.g. if there is
579
579
  # an instance of Backreference::Number, its #referenced_expression is set to
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
- # find all referencable expressions
582
+ # find all referenceable and referring expressions
583
583
  targets = { 0 => root }
584
+ referrers = []
584
585
  root.each_expression do |exp|
585
586
  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
+ referrers << exp if exp.referential?
586
588
  end
587
- # assign them to any refering expressions
588
- root.each_expression do |exp|
589
- next unless exp.respond_to?(:reference)
590
-
589
+ # assign reference expression to referring expressions
590
+ # (in a second iteration because there might be forward references)
591
+ referrers.each do |exp|
591
592
  exp.referenced_expression = targets[exp.reference] ||
592
- raise(ParserError, "Invalid reference: #{exp.reference}")
593
+ raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
593
594
  end
594
595
  end
595
596
  end # module Regexp::Parser
@@ -0,0 +1,8 @@
1
+ class Regexp::Scanner
2
+ # Unexpected end of pattern
3
+ class PrematureEndError < ScannerError
4
+ def initialize(where = '')
5
+ super "Premature end of pattern at #{where}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ require_relative '../../../regexp_parser/error'
2
+
3
+ class Regexp::Scanner
4
+ # General scanner error (catch all)
5
+ class ScannerError < Regexp::Parser::Error; end
6
+ end
@@ -0,0 +1,63 @@
1
+ class Regexp::Scanner
2
+ # Base for all scanner validation errors
3
+ class ValidationError < ScannerError
4
+ # Centralizes and unifies the handling of validation related errors.
5
+ def self.for(type, problem, reason = nil)
6
+ types.fetch(type).new(problem, reason)
7
+ end
8
+
9
+ def self.types
10
+ @types ||= {
11
+ backref: InvalidBackrefError,
12
+ group: InvalidGroupError,
13
+ group_option: InvalidGroupOption,
14
+ posix_class: UnknownPosixClassError,
15
+ property: UnknownUnicodePropertyError,
16
+ sequence: InvalidSequenceError,
17
+ }
18
+ end
19
+ end
20
+
21
+ # Invalid sequence format. Used for escape sequences, mainly.
22
+ class InvalidSequenceError < ValidationError
23
+ def initialize(what = 'sequence', where = '')
24
+ super "Invalid #{what} at #{where}"
25
+ end
26
+ end
27
+
28
+ # Invalid group. Used for named groups.
29
+ class InvalidGroupError < ValidationError
30
+ def initialize(what, reason)
31
+ super "Invalid #{what}, #{reason}."
32
+ end
33
+ end
34
+
35
+ # Invalid groupOption. Used for inline options.
36
+ # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
37
+ class InvalidGroupOption < ValidationError
38
+ def initialize(option, text)
39
+ super "Invalid group option #{option} in #{text}"
40
+ end
41
+ end
42
+
43
+ # Invalid back reference. Used for name a number refs/calls.
44
+ class InvalidBackrefError < ValidationError
45
+ def initialize(what, reason)
46
+ super "Invalid back reference #{what}, #{reason}"
47
+ end
48
+ end
49
+
50
+ # The property name was not recognized by the scanner.
51
+ class UnknownUnicodePropertyError < ValidationError
52
+ def initialize(name, _)
53
+ super "Unknown unicode character property name #{name}"
54
+ end
55
+ end
56
+
57
+ # The POSIX class name was not recognized by the scanner.
58
+ class UnknownPosixClassError < ValidationError
59
+ def initialize(text, _)
60
+ super "Unknown POSIX class #{text}"
61
+ end
62
+ end
63
+ end
@@ -7,6 +7,8 @@ age=12.0,age=12.0
7
7
  age=12.1,age=12.1
8
8
  age=13.0,age=13.0
9
9
  age=14.0,age=14.0
10
+ age=15.0,age=15.0
11
+ age=15.1,age=15.1
10
12
  age=2.0,age=2.0
11
13
  age=2.1,age=2.1
12
14
  age=3.0,age=3.0
@@ -97,6 +99,7 @@ emojimodifierbase,emoji_modifier_base
97
99
  emojipresentation,emoji_presentation
98
100
  enclosingmark,enclosing_mark
99
101
  ethiopic,ethiopic
102
+ extendedpictographic,extended_pictographic
100
103
  extender,extender
101
104
  finalpunctuation,final_punctuation
102
105
  format,format
@@ -106,6 +109,19 @@ gothic,gothic
106
109
  grantha,grantha
107
110
  graph,graph
108
111
  graphemebase,grapheme_base
112
+ graphemeclusterbreak=control,grapheme_cluster_break=control
113
+ graphemeclusterbreak=cr,grapheme_cluster_break=cr
114
+ graphemeclusterbreak=extend,grapheme_cluster_break=extend
115
+ graphemeclusterbreak=l,grapheme_cluster_break=l
116
+ graphemeclusterbreak=lf,grapheme_cluster_break=lf
117
+ graphemeclusterbreak=lv,grapheme_cluster_break=lv
118
+ graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
119
+ graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
120
+ graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
121
+ graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
122
+ graphemeclusterbreak=t,grapheme_cluster_break=t
123
+ graphemeclusterbreak=v,grapheme_cluster_break=v
124
+ graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
109
125
  graphemeextend,grapheme_extend
110
126
  graphemelink,grapheme_link
111
127
  greek,greek
@@ -121,11 +137,14 @@ hebrew,hebrew
121
137
  hexdigit,hex_digit
122
138
  hiragana,hiragana
123
139
  hyphen,hyphen
140
+ idcompatmathcontinue,id_compat_math_continue
141
+ idcompatmathstart,id_compat_math_start
124
142
  idcontinue,id_continue
125
143
  ideographic,ideographic
126
144
  idsbinaryoperator,ids_binary_operator
127
145
  idstart,id_start
128
146
  idstrinaryoperator,ids_trinary_operator
147
+ idsunaryoperator,ids_unary_operator
129
148
  imperialaramaic,imperial_aramaic
130
149
  inadlam,in_adlam
131
150
  inaegeannumbers,in_aegean_numbers
@@ -139,6 +158,7 @@ inancientsymbols,in_ancient_symbols
139
158
  inarabic,in_arabic
140
159
  inarabicextendeda,in_arabic_extended_a
141
160
  inarabicextendedb,in_arabic_extended_b
161
+ inarabicextendedc,in_arabic_extended_c
142
162
  inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
143
163
  inarabicpresentationformsa,in_arabic_presentation_forms_a
144
164
  inarabicpresentationformsb,in_arabic_presentation_forms_b
@@ -186,6 +206,8 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
186
206
  incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
187
207
  incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
188
208
  incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
209
+ incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
210
+ incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
189
211
  incombiningdiacriticalmarks,in_combining_diacritical_marks
190
212
  incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
191
213
  incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
@@ -205,10 +227,12 @@ incyrillic,in_cyrillic
205
227
  incyrillicextendeda,in_cyrillic_extended_a
206
228
  incyrillicextendedb,in_cyrillic_extended_b
207
229
  incyrillicextendedc,in_cyrillic_extended_c
230
+ incyrillicextendedd,in_cyrillic_extended_d
208
231
  incyrillicsupplement,in_cyrillic_supplement
209
232
  indeseret,in_deseret
210
233
  indevanagari,in_devanagari
211
234
  indevanagariextended,in_devanagari_extended
235
+ indevanagariextendeda,in_devanagari_extended_a
212
236
  indingbats,in_dingbats
213
237
  indivesakuru,in_dives_akuru
214
238
  indogra,in_dogra
@@ -268,6 +292,7 @@ inipaextensions,in_ipa_extensions
268
292
  initialpunctuation,initial_punctuation
269
293
  injavanese,in_javanese
270
294
  inkaithi,in_kaithi
295
+ inkaktoviknumerals,in_kaktovik_numerals
271
296
  inkanaextendeda,in_kana_extended_a
272
297
  inkanaextendedb,in_kana_extended_b
273
298
  inkanasupplement,in_kana_supplement
@@ -276,6 +301,7 @@ inkangxiradicals,in_kangxi_radicals
276
301
  inkannada,in_kannada
277
302
  inkatakana,in_katakana
278
303
  inkatakanaphoneticextensions,in_katakana_phonetic_extensions
304
+ inkawi,in_kawi
279
305
  inkayahli,in_kayah_li
280
306
  inkharoshthi,in_kharoshthi
281
307
  inkhitansmallscript,in_khitan_small_script
@@ -339,6 +365,7 @@ inmyanmar,in_myanmar
339
365
  inmyanmarextendeda,in_myanmar_extended_a
340
366
  inmyanmarextendedb,in_myanmar_extended_b
341
367
  innabataean,in_nabataean
368
+ innagmundari,in_nag_mundari
342
369
  innandinagari,in_nandinagari
343
370
  innewa,in_newa
344
371
  innewtailue,in_new_tai_lue
@@ -457,6 +484,7 @@ joincontrol,join_control
457
484
  kaithi,kaithi
458
485
  kannada,kannada
459
486
  katakana,katakana
487
+ kawi,kawi
460
488
  kayahli,kayah_li
461
489
  kharoshthi,kharoshthi
462
490
  khitansmallscript,khitan_small_script
@@ -503,6 +531,7 @@ mro,mro
503
531
  multani,multani
504
532
  myanmar,myanmar
505
533
  nabataean,nabataean
534
+ nagmundari,nag_mundari
506
535
  nandinagari,nandinagari
507
536
  newa,newa
508
537
  newline,newline
@@ -57,6 +57,7 @@ emod,emoji_modifier
57
57
  epres,emoji_presentation
58
58
  ethi,ethiopic
59
59
  ext,extender
60
+ extpict,extended_pictographic
60
61
  geor,georgian
61
62
  glag,glagolitic
62
63
  gong,gunjala_gondi
@@ -85,6 +86,7 @@ ideo,ideographic
85
86
  ids,id_start
86
87
  idsb,ids_binary_operator
87
88
  idst,ids_trinary_operator
89
+ idsu,ids_unary_operator
88
90
  ital,old_italic
89
91
  java,javanese
90
92
  joinc,join_control
@@ -133,6 +135,7 @@ mtei,meetei_mayek
133
135
  mult,multani
134
136
  mymr,myanmar
135
137
  n,number
138
+ nagm,nag_mundari
136
139
  nand,nandinagari
137
140
  narb,old_north_arabian
138
141
  nbat,nabataean
@@ -17,10 +17,10 @@
17
17
  text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
- name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
20
+ name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- validation_error(:property, name) unless token
23
+ raise ValidationError.for(:property, name) unless token
24
24
 
25
25
  self.emit(type, token.to_sym, text)
26
26