regexp_parser 2.6.0 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -5
  3. data/LICENSE +1 -1
  4. data/lib/regexp_parser/expression/base.rb +0 -7
  5. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  6. data/lib/regexp_parser/expression/classes/backreference.rb +17 -3
  7. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  8. data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
  9. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  10. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  11. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  13. data/lib/regexp_parser/expression/classes/keep.rb +1 -1
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
  18. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  19. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  20. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  21. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  22. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  23. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  24. data/lib/regexp_parser/expression/sequence.rb +5 -10
  25. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  26. data/lib/regexp_parser/expression/shared.rb +37 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  28. data/lib/regexp_parser/expression.rb +34 -31
  29. data/lib/regexp_parser/lexer.rb +76 -36
  30. data/lib/regexp_parser/parser.rb +101 -100
  31. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  32. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  33. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  34. data/lib/regexp_parser/scanner/properties/long.csv +29 -0
  35. data/lib/regexp_parser/scanner/properties/short.csv +3 -0
  36. data/lib/regexp_parser/scanner/property.rl +2 -2
  37. data/lib/regexp_parser/scanner/scanner.rl +101 -172
  38. data/lib/regexp_parser/scanner.rb +1132 -1283
  39. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  42. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
  44. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  45. data/lib/regexp_parser/syntax/token.rb +13 -13
  46. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  47. data/lib/regexp_parser/syntax/versions.rb +3 -1
  48. data/lib/regexp_parser/syntax.rb +1 -1
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +6 -6
  51. data/regexp_parser.gemspec +5 -5
  52. metadata +14 -8
  53. data/CHANGELOG.md +0 -601
  54. data/README.md +0 -503
@@ -1,5 +1,5 @@
1
- require 'regexp_parser/error'
2
- require 'regexp_parser/expression'
1
+ require_relative 'error'
2
+ require_relative 'expression'
3
3
 
4
4
  class Regexp::Parser
5
5
  include Regexp::Expression
@@ -18,11 +18,11 @@ class Regexp::Parser
18
18
  end
19
19
  end
20
20
 
21
- def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
21
+ def self.parse(input, syntax = nil, options: nil, &block)
22
22
  new.parse(input, syntax, options: options, &block)
23
23
  end
24
24
 
25
- def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
25
+ def parse(input, syntax = nil, options: nil, &block)
26
26
  root = Root.construct(options: extract_options(input, options))
27
27
 
28
28
  self.root = root
@@ -35,7 +35,7 @@ class Regexp::Parser
35
35
 
36
36
  self.captured_group_counts = Hash.new(0)
37
37
 
38
- Regexp::Lexer.scan(input, syntax, options: options) do |token|
38
+ Regexp::Lexer.scan(input, syntax, options: options, collect_tokens: false) do |token|
39
39
  parse_token(token)
40
40
  end
41
41
 
@@ -232,7 +232,7 @@ class Regexp::Parser
232
232
  node << Backreference::NameRecursionLevel.new(token, active_opts)
233
233
  when :name_call
234
234
  node << Backreference::NameCall.new(token, active_opts)
235
- when :number, :number_ref
235
+ when :number, :number_ref # TODO: split in v3.0.0
236
236
  node << Backreference::Number.new(token, active_opts)
237
237
  when :number_recursion_ref
238
238
  node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
@@ -272,9 +272,9 @@ class Regexp::Parser
272
272
  nest_conditional(Conditional::Expression.new(token, active_opts))
273
273
  when :condition
274
274
  conditional_nesting.last.condition = Conditional::Condition.new(token, active_opts)
275
- conditional_nesting.last.add_sequence(active_opts)
275
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
276
276
  when :separator
277
- conditional_nesting.last.add_sequence(active_opts)
277
+ conditional_nesting.last.add_sequence(active_opts, { ts: token.te })
278
278
  self.node = conditional_nesting.last.branches.last
279
279
  when :close
280
280
  conditional_nesting.pop
@@ -322,6 +322,7 @@ class Regexp::Parser
322
322
 
323
323
  when :control
324
324
  if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
325
+ # TODO: emit :meta_control_sequence token in v3.0.0
325
326
  node << EscapeSequence::MetaControl.new(token, active_opts)
326
327
  else
327
328
  node << EscapeSequence::Control.new(token, active_opts)
@@ -329,6 +330,7 @@ class Regexp::Parser
329
330
 
330
331
  when :meta_sequence
331
332
  if token.text =~ /\A\\M-\\[Cc]/
333
+ # TODO: emit :meta_control_sequence token in v3.0.0:
332
334
  node << EscapeSequence::MetaControl.new(token, active_opts)
333
335
  else
334
336
  node << EscapeSequence::Meta.new(token, active_opts)
@@ -349,11 +351,7 @@ class Regexp::Parser
349
351
  when :comment
350
352
  node << Comment.new(token, active_opts)
351
353
  when :whitespace
352
- if node.last.is_a?(WhiteSpace)
353
- node.last.merge(WhiteSpace.new(token, active_opts))
354
- else
355
- node << WhiteSpace.new(token, active_opts)
356
- end
354
+ node << WhiteSpace.new(token, active_opts)
357
355
  else
358
356
  raise UnknownTokenError.new('FreeSpace', token)
359
357
  end
@@ -379,98 +377,99 @@ class Regexp::Parser
379
377
  end
380
378
 
381
379
  def sequence_operation(klass, token)
382
- unless node.is_a?(klass)
380
+ unless node.instance_of?(klass)
383
381
  operator = klass.new(token, active_opts)
384
- sequence = operator.add_sequence(active_opts)
382
+ sequence = operator.add_sequence(active_opts, { ts: token.ts })
385
383
  sequence.expressions = node.expressions
386
384
  node.expressions = []
387
385
  nest(operator)
388
386
  end
389
- node.add_sequence(active_opts)
387
+ node.add_sequence(active_opts, { ts: token.te })
390
388
  end
391
389
 
392
390
  def posixclass(token)
393
391
  node << PosixClass.new(token, active_opts)
394
392
  end
395
393
 
396
- include Regexp::Expression::UnicodeProperty
397
- UPTokens = Regexp::Syntax::Token::UnicodeProperty
394
+ UP = Regexp::Expression::Property
395
+ UPTokens = Regexp::Syntax::Token::Property
398
396
 
399
397
  def property(token)
400
398
  case token.token
401
- when :alnum; node << Alnum.new(token, active_opts)
402
- when :alpha; node << Alpha.new(token, active_opts)
403
- when :ascii; node << Ascii.new(token, active_opts)
404
- when :blank; node << Blank.new(token, active_opts)
405
- when :cntrl; node << Cntrl.new(token, active_opts)
406
- when :digit; node << Digit.new(token, active_opts)
407
- when :graph; node << Graph.new(token, active_opts)
408
- when :lower; node << Lower.new(token, active_opts)
409
- when :print; node << Print.new(token, active_opts)
410
- when :punct; node << Punct.new(token, active_opts)
411
- when :space; node << Space.new(token, active_opts)
412
- when :upper; node << Upper.new(token, active_opts)
413
- when :word; node << Word.new(token, active_opts)
414
- when :xdigit; node << Xdigit.new(token, active_opts)
415
- when :xposixpunct; node << XPosixPunct.new(token, active_opts)
399
+ when :alnum; node << UP::Alnum.new(token, active_opts)
400
+ when :alpha; node << UP::Alpha.new(token, active_opts)
401
+ when :ascii; node << UP::Ascii.new(token, active_opts)
402
+ when :blank; node << UP::Blank.new(token, active_opts)
403
+ when :cntrl; node << UP::Cntrl.new(token, active_opts)
404
+ when :digit; node << UP::Digit.new(token, active_opts)
405
+ when :graph; node << UP::Graph.new(token, active_opts)
406
+ when :lower; node << UP::Lower.new(token, active_opts)
407
+ when :print; node << UP::Print.new(token, active_opts)
408
+ when :punct; node << UP::Punct.new(token, active_opts)
409
+ when :space; node << UP::Space.new(token, active_opts)
410
+ when :upper; node << UP::Upper.new(token, active_opts)
411
+ when :word; node << UP::Word.new(token, active_opts)
412
+ when :xdigit; node << UP::Xdigit.new(token, active_opts)
413
+ when :xposixpunct; node << UP::XPosixPunct.new(token, active_opts)
416
414
 
417
415
  # only in Oniguruma (old rubies)
418
- when :newline; node << Newline.new(token, active_opts)
419
-
420
- when :any; node << Any.new(token, active_opts)
421
- when :assigned; node << Assigned.new(token, active_opts)
422
-
423
- when :letter; node << Letter::Any.new(token, active_opts)
424
- when :cased_letter; node << Letter::Cased.new(token, active_opts)
425
- when :uppercase_letter; node << Letter::Uppercase.new(token, active_opts)
426
- when :lowercase_letter; node << Letter::Lowercase.new(token, active_opts)
427
- when :titlecase_letter; node << Letter::Titlecase.new(token, active_opts)
428
- when :modifier_letter; node << Letter::Modifier.new(token, active_opts)
429
- when :other_letter; node << Letter::Other.new(token, active_opts)
430
-
431
- when :mark; node << Mark::Any.new(token, active_opts)
432
- when :combining_mark; node << Mark::Combining.new(token, active_opts)
433
- when :nonspacing_mark; node << Mark::Nonspacing.new(token, active_opts)
434
- when :spacing_mark; node << Mark::Spacing.new(token, active_opts)
435
- when :enclosing_mark; node << Mark::Enclosing.new(token, active_opts)
436
-
437
- when :number; node << Number::Any.new(token, active_opts)
438
- when :decimal_number; node << Number::Decimal.new(token, active_opts)
439
- when :letter_number; node << Number::Letter.new(token, active_opts)
440
- when :other_number; node << Number::Other.new(token, active_opts)
441
-
442
- when :punctuation; node << Punctuation::Any.new(token, active_opts)
443
- when :connector_punctuation; node << Punctuation::Connector.new(token, active_opts)
444
- when :dash_punctuation; node << Punctuation::Dash.new(token, active_opts)
445
- when :open_punctuation; node << Punctuation::Open.new(token, active_opts)
446
- when :close_punctuation; node << Punctuation::Close.new(token, active_opts)
447
- when :initial_punctuation; node << Punctuation::Initial.new(token, active_opts)
448
- when :final_punctuation; node << Punctuation::Final.new(token, active_opts)
449
- when :other_punctuation; node << Punctuation::Other.new(token, active_opts)
450
-
451
- when :separator; node << Separator::Any.new(token, active_opts)
452
- when :space_separator; node << Separator::Space.new(token, active_opts)
453
- when :line_separator; node << Separator::Line.new(token, active_opts)
454
- when :paragraph_separator; node << Separator::Paragraph.new(token, active_opts)
455
-
456
- when :symbol; node << Symbol::Any.new(token, active_opts)
457
- when :math_symbol; node << Symbol::Math.new(token, active_opts)
458
- when :currency_symbol; node << Symbol::Currency.new(token, active_opts)
459
- when :modifier_symbol; node << Symbol::Modifier.new(token, active_opts)
460
- when :other_symbol; node << Symbol::Other.new(token, active_opts)
461
-
462
- when :other; node << Codepoint::Any.new(token, active_opts)
463
- when :control; node << Codepoint::Control.new(token, active_opts)
464
- when :format; node << Codepoint::Format.new(token, active_opts)
465
- when :surrogate; node << Codepoint::Surrogate.new(token, active_opts)
466
- when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
467
- when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
468
-
469
- when *UPTokens::Age; node << Age.new(token, active_opts)
470
- when *UPTokens::Derived; node << Derived.new(token, active_opts)
471
- when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
472
- when *UPTokens::Script; node << Script.new(token, active_opts)
473
- when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
416
+ when :newline; node << UP::Newline.new(token, active_opts)
417
+
418
+ when :any; node << UP::Any.new(token, active_opts)
419
+ when :assigned; node << UP::Assigned.new(token, active_opts)
420
+
421
+ when :letter; node << UP::Letter::Any.new(token, active_opts)
422
+ when :cased_letter; node << UP::Letter::Cased.new(token, active_opts)
423
+ when :uppercase_letter; node << UP::Letter::Uppercase.new(token, active_opts)
424
+ when :lowercase_letter; node << UP::Letter::Lowercase.new(token, active_opts)
425
+ when :titlecase_letter; node << UP::Letter::Titlecase.new(token, active_opts)
426
+ when :modifier_letter; node << UP::Letter::Modifier.new(token, active_opts)
427
+ when :other_letter; node << UP::Letter::Other.new(token, active_opts)
428
+
429
+ when :mark; node << UP::Mark::Any.new(token, active_opts)
430
+ when :combining_mark; node << UP::Mark::Combining.new(token, active_opts)
431
+ when :nonspacing_mark; node << UP::Mark::Nonspacing.new(token, active_opts)
432
+ when :spacing_mark; node << UP::Mark::Spacing.new(token, active_opts)
433
+ when :enclosing_mark; node << UP::Mark::Enclosing.new(token, active_opts)
434
+
435
+ when :number; node << UP::Number::Any.new(token, active_opts)
436
+ when :decimal_number; node << UP::Number::Decimal.new(token, active_opts)
437
+ when :letter_number; node << UP::Number::Letter.new(token, active_opts)
438
+ when :other_number; node << UP::Number::Other.new(token, active_opts)
439
+
440
+ when :punctuation; node << UP::Punctuation::Any.new(token, active_opts)
441
+ when :connector_punctuation; node << UP::Punctuation::Connector.new(token, active_opts)
442
+ when :dash_punctuation; node << UP::Punctuation::Dash.new(token, active_opts)
443
+ when :open_punctuation; node << UP::Punctuation::Open.new(token, active_opts)
444
+ when :close_punctuation; node << UP::Punctuation::Close.new(token, active_opts)
445
+ when :initial_punctuation; node << UP::Punctuation::Initial.new(token, active_opts)
446
+ when :final_punctuation; node << UP::Punctuation::Final.new(token, active_opts)
447
+ when :other_punctuation; node << UP::Punctuation::Other.new(token, active_opts)
448
+
449
+ when :separator; node << UP::Separator::Any.new(token, active_opts)
450
+ when :space_separator; node << UP::Separator::Space.new(token, active_opts)
451
+ when :line_separator; node << UP::Separator::Line.new(token, active_opts)
452
+ when :paragraph_separator; node << UP::Separator::Paragraph.new(token, active_opts)
453
+
454
+ when :symbol; node << UP::Symbol::Any.new(token, active_opts)
455
+ when :math_symbol; node << UP::Symbol::Math.new(token, active_opts)
456
+ when :currency_symbol; node << UP::Symbol::Currency.new(token, active_opts)
457
+ when :modifier_symbol; node << UP::Symbol::Modifier.new(token, active_opts)
458
+ when :other_symbol; node << UP::Symbol::Other.new(token, active_opts)
459
+
460
+ when :other; node << UP::Codepoint::Any.new(token, active_opts)
461
+ when :control; node << UP::Codepoint::Control.new(token, active_opts)
462
+ when :format; node << UP::Codepoint::Format.new(token, active_opts)
463
+ when :surrogate; node << UP::Codepoint::Surrogate.new(token, active_opts)
464
+ when :private_use; node << UP::Codepoint::PrivateUse.new(token, active_opts)
465
+ when :unassigned; node << UP::Codepoint::Unassigned.new(token, active_opts)
466
+
467
+ when *UPTokens::Age; node << UP::Age.new(token, active_opts)
468
+ when *UPTokens::Derived; node << UP::Derived.new(token, active_opts)
469
+ when *UPTokens::Emoji; node << UP::Emoji.new(token, active_opts)
470
+ when *UPTokens::Enumerated; node << UP::Enumerated.new(token, active_opts)
471
+ when *UPTokens::Script; node << UP::Script.new(token, active_opts)
472
+ when *UPTokens::UnicodeBlock; node << UP::Block.new(token, active_opts)
474
473
 
475
474
  else
476
475
  raise UnknownTokenError.new('UnicodeProperty', token)
@@ -478,8 +477,7 @@ class Regexp::Parser
478
477
  end
479
478
 
480
479
  def quantifier(token)
481
- target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
482
- target_node or raise ParserError, "No valid target found for '#{token.text}'"
480
+ target_node = node.extract_quantifier_target(token.text)
483
481
 
484
482
  # in case of chained quantifiers, wrap target in an implicit passive group
485
483
  # description of the problem: https://github.com/ammar/regexp_parser/issues/3
@@ -527,6 +525,8 @@ class Regexp::Parser
527
525
  end
528
526
 
529
527
  def open_set(token)
528
+ # TODO: this and Quantifier are the only cases where Expression#token
529
+ # does not match the scanner/lexer output. Fix in v3.0.0.
530
530
  token.token = :character
531
531
  nest(CharacterSet.new(token, active_opts))
532
532
  end
@@ -541,7 +541,7 @@ class Regexp::Parser
541
541
 
542
542
  def range(token)
543
543
  exp = CharacterSet::Range.new(token, active_opts)
544
- scope = node.last.is_a?(CharacterSet::IntersectedSequence) ? node.last : node
544
+ scope = node.last.instance_of?(CharacterSet::IntersectedSequence) ? node.last : node
545
545
  exp << scope.expressions.pop
546
546
  nest(exp)
547
547
  end
@@ -568,28 +568,29 @@ class Regexp::Parser
568
568
  end
569
569
 
570
570
  def close_completed_character_set_range
571
- decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
571
+ decrease_nesting if node.instance_of?(CharacterSet::Range) && node.complete?
572
572
  end
573
573
 
574
574
  def active_opts
575
575
  options_stack.last
576
576
  end
577
577
 
578
- # Assigns referenced expressions to refering expressions, e.g. if there is
578
+ # Assigns referenced expressions to referring expressions, e.g. if there is
579
579
  # an instance of Backreference::Number, its #referenced_expression is set to
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
- # find all referencable expressions
582
+ # find all referenceable and referring expressions
583
583
  targets = { 0 => root }
584
+ referrers = []
584
585
  root.each_expression do |exp|
585
586
  exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
+ referrers << exp if exp.referential?
586
588
  end
587
- # assign them to any refering expressions
588
- root.each_expression do |exp|
589
- next unless exp.respond_to?(:reference)
590
-
589
+ # assign reference expression to referring expressions
590
+ # (in a second iteration because there might be forward references)
591
+ referrers.each do |exp|
591
592
  exp.referenced_expression = targets[exp.reference] ||
592
- raise(ParserError, "Invalid reference: #{exp.reference}")
593
+ raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
593
594
  end
594
595
  end
595
596
  end # module Regexp::Parser
@@ -0,0 +1,8 @@
1
+ class Regexp::Scanner
2
+ # Unexpected end of pattern
3
+ class PrematureEndError < ScannerError
4
+ def initialize(where = '')
5
+ super "Premature end of pattern at #{where}"
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ require_relative '../../../regexp_parser/error'
2
+
3
+ class Regexp::Scanner
4
+ # General scanner error (catch all)
5
+ class ScannerError < Regexp::Parser::Error; end
6
+ end
@@ -0,0 +1,63 @@
1
+ class Regexp::Scanner
2
+ # Base for all scanner validation errors
3
+ class ValidationError < ScannerError
4
+ # Centralizes and unifies the handling of validation related errors.
5
+ def self.for(type, problem, reason = nil)
6
+ types.fetch(type).new(problem, reason)
7
+ end
8
+
9
+ def self.types
10
+ @types ||= {
11
+ backref: InvalidBackrefError,
12
+ group: InvalidGroupError,
13
+ group_option: InvalidGroupOption,
14
+ posix_class: UnknownPosixClassError,
15
+ property: UnknownUnicodePropertyError,
16
+ sequence: InvalidSequenceError,
17
+ }
18
+ end
19
+ end
20
+
21
+ # Invalid sequence format. Used for escape sequences, mainly.
22
+ class InvalidSequenceError < ValidationError
23
+ def initialize(what = 'sequence', where = '')
24
+ super "Invalid #{what} at #{where}"
25
+ end
26
+ end
27
+
28
+ # Invalid group. Used for named groups.
29
+ class InvalidGroupError < ValidationError
30
+ def initialize(what, reason)
31
+ super "Invalid #{what}, #{reason}."
32
+ end
33
+ end
34
+
35
+ # Invalid groupOption. Used for inline options.
36
+ # TODO: should become InvalidGroupOptionError in v3.0.0 for consistency
37
+ class InvalidGroupOption < ValidationError
38
+ def initialize(option, text)
39
+ super "Invalid group option #{option} in #{text}"
40
+ end
41
+ end
42
+
43
+ # Invalid back reference. Used for name a number refs/calls.
44
+ class InvalidBackrefError < ValidationError
45
+ def initialize(what, reason)
46
+ super "Invalid back reference #{what}, #{reason}"
47
+ end
48
+ end
49
+
50
+ # The property name was not recognized by the scanner.
51
+ class UnknownUnicodePropertyError < ValidationError
52
+ def initialize(name, _)
53
+ super "Unknown unicode character property name #{name}"
54
+ end
55
+ end
56
+
57
+ # The POSIX class name was not recognized by the scanner.
58
+ class UnknownPosixClassError < ValidationError
59
+ def initialize(text, _)
60
+ super "Unknown POSIX class #{text}"
61
+ end
62
+ end
63
+ end
@@ -7,6 +7,8 @@ age=12.0,age=12.0
7
7
  age=12.1,age=12.1
8
8
  age=13.0,age=13.0
9
9
  age=14.0,age=14.0
10
+ age=15.0,age=15.0
11
+ age=15.1,age=15.1
10
12
  age=2.0,age=2.0
11
13
  age=2.1,age=2.1
12
14
  age=3.0,age=3.0
@@ -97,6 +99,7 @@ emojimodifierbase,emoji_modifier_base
97
99
  emojipresentation,emoji_presentation
98
100
  enclosingmark,enclosing_mark
99
101
  ethiopic,ethiopic
102
+ extendedpictographic,extended_pictographic
100
103
  extender,extender
101
104
  finalpunctuation,final_punctuation
102
105
  format,format
@@ -106,6 +109,19 @@ gothic,gothic
106
109
  grantha,grantha
107
110
  graph,graph
108
111
  graphemebase,grapheme_base
112
+ graphemeclusterbreak=control,grapheme_cluster_break=control
113
+ graphemeclusterbreak=cr,grapheme_cluster_break=cr
114
+ graphemeclusterbreak=extend,grapheme_cluster_break=extend
115
+ graphemeclusterbreak=l,grapheme_cluster_break=l
116
+ graphemeclusterbreak=lf,grapheme_cluster_break=lf
117
+ graphemeclusterbreak=lv,grapheme_cluster_break=lv
118
+ graphemeclusterbreak=lvt,grapheme_cluster_break=lvt
119
+ graphemeclusterbreak=prepend,grapheme_cluster_break=prepend
120
+ graphemeclusterbreak=regionalindicator,grapheme_cluster_break=regional_indicator
121
+ graphemeclusterbreak=spacingmark,grapheme_cluster_break=spacingmark
122
+ graphemeclusterbreak=t,grapheme_cluster_break=t
123
+ graphemeclusterbreak=v,grapheme_cluster_break=v
124
+ graphemeclusterbreak=zwj,grapheme_cluster_break=zwj
109
125
  graphemeextend,grapheme_extend
110
126
  graphemelink,grapheme_link
111
127
  greek,greek
@@ -121,11 +137,14 @@ hebrew,hebrew
121
137
  hexdigit,hex_digit
122
138
  hiragana,hiragana
123
139
  hyphen,hyphen
140
+ idcompatmathcontinue,id_compat_math_continue
141
+ idcompatmathstart,id_compat_math_start
124
142
  idcontinue,id_continue
125
143
  ideographic,ideographic
126
144
  idsbinaryoperator,ids_binary_operator
127
145
  idstart,id_start
128
146
  idstrinaryoperator,ids_trinary_operator
147
+ idsunaryoperator,ids_unary_operator
129
148
  imperialaramaic,imperial_aramaic
130
149
  inadlam,in_adlam
131
150
  inaegeannumbers,in_aegean_numbers
@@ -139,6 +158,7 @@ inancientsymbols,in_ancient_symbols
139
158
  inarabic,in_arabic
140
159
  inarabicextendeda,in_arabic_extended_a
141
160
  inarabicextendedb,in_arabic_extended_b
161
+ inarabicextendedc,in_arabic_extended_c
142
162
  inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols
143
163
  inarabicpresentationformsa,in_arabic_presentation_forms_a
144
164
  inarabicpresentationformsb,in_arabic_presentation_forms_b
@@ -186,6 +206,8 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d
186
206
  incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e
187
207
  incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
188
208
  incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
209
+ incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
210
+ incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
189
211
  incombiningdiacriticalmarks,in_combining_diacritical_marks
190
212
  incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
191
213
  incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
@@ -205,10 +227,12 @@ incyrillic,in_cyrillic
205
227
  incyrillicextendeda,in_cyrillic_extended_a
206
228
  incyrillicextendedb,in_cyrillic_extended_b
207
229
  incyrillicextendedc,in_cyrillic_extended_c
230
+ incyrillicextendedd,in_cyrillic_extended_d
208
231
  incyrillicsupplement,in_cyrillic_supplement
209
232
  indeseret,in_deseret
210
233
  indevanagari,in_devanagari
211
234
  indevanagariextended,in_devanagari_extended
235
+ indevanagariextendeda,in_devanagari_extended_a
212
236
  indingbats,in_dingbats
213
237
  indivesakuru,in_dives_akuru
214
238
  indogra,in_dogra
@@ -268,6 +292,7 @@ inipaextensions,in_ipa_extensions
268
292
  initialpunctuation,initial_punctuation
269
293
  injavanese,in_javanese
270
294
  inkaithi,in_kaithi
295
+ inkaktoviknumerals,in_kaktovik_numerals
271
296
  inkanaextendeda,in_kana_extended_a
272
297
  inkanaextendedb,in_kana_extended_b
273
298
  inkanasupplement,in_kana_supplement
@@ -276,6 +301,7 @@ inkangxiradicals,in_kangxi_radicals
276
301
  inkannada,in_kannada
277
302
  inkatakana,in_katakana
278
303
  inkatakanaphoneticextensions,in_katakana_phonetic_extensions
304
+ inkawi,in_kawi
279
305
  inkayahli,in_kayah_li
280
306
  inkharoshthi,in_kharoshthi
281
307
  inkhitansmallscript,in_khitan_small_script
@@ -339,6 +365,7 @@ inmyanmar,in_myanmar
339
365
  inmyanmarextendeda,in_myanmar_extended_a
340
366
  inmyanmarextendedb,in_myanmar_extended_b
341
367
  innabataean,in_nabataean
368
+ innagmundari,in_nag_mundari
342
369
  innandinagari,in_nandinagari
343
370
  innewa,in_newa
344
371
  innewtailue,in_new_tai_lue
@@ -457,6 +484,7 @@ joincontrol,join_control
457
484
  kaithi,kaithi
458
485
  kannada,kannada
459
486
  katakana,katakana
487
+ kawi,kawi
460
488
  kayahli,kayah_li
461
489
  kharoshthi,kharoshthi
462
490
  khitansmallscript,khitan_small_script
@@ -503,6 +531,7 @@ mro,mro
503
531
  multani,multani
504
532
  myanmar,myanmar
505
533
  nabataean,nabataean
534
+ nagmundari,nag_mundari
506
535
  nandinagari,nandinagari
507
536
  newa,newa
508
537
  newline,newline
@@ -57,6 +57,7 @@ emod,emoji_modifier
57
57
  epres,emoji_presentation
58
58
  ethi,ethiopic
59
59
  ext,extender
60
+ extpict,extended_pictographic
60
61
  geor,georgian
61
62
  glag,glagolitic
62
63
  gong,gunjala_gondi
@@ -85,6 +86,7 @@ ideo,ideographic
85
86
  ids,id_start
86
87
  idsb,ids_binary_operator
87
88
  idst,ids_trinary_operator
89
+ idsu,ids_unary_operator
88
90
  ital,old_italic
89
91
  java,javanese
90
92
  joinc,join_control
@@ -133,6 +135,7 @@ mtei,meetei_mayek
133
135
  mult,multani
134
136
  mymr,myanmar
135
137
  n,number
138
+ nagm,nag_mundari
136
139
  nand,nandinagari
137
140
  narb,old_north_arabian
138
141
  nbat,nabataean
@@ -17,10 +17,10 @@
17
17
  text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
- name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
20
+ name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- validation_error(:property, name) unless token
23
+ raise ValidationError.for(:property, name) unless token
24
24
 
25
25
  self.emit(type, token.to_sym, text)
26
26