regexp_parser 0.5.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +242 -0
  3. data/Gemfile +1 -0
  4. data/README.md +21 -17
  5. data/Rakefile +31 -0
  6. data/lib/regexp_parser/expression.rb +11 -9
  7. data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
  8. data/lib/regexp_parser/expression/classes/backref.rb +21 -16
  9. data/lib/regexp_parser/expression/classes/escape.rb +81 -10
  10. data/lib/regexp_parser/expression/classes/group.rb +20 -20
  11. data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +6 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +10 -93
  14. data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
  15. data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
  16. data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
  17. data/lib/regexp_parser/expression/methods/tests.rb +4 -14
  18. data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +3 -4
  20. data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
  21. data/lib/regexp_parser/expression/subexpression.rb +6 -10
  22. data/lib/regexp_parser/lexer.rb +13 -17
  23. data/lib/regexp_parser/parser.rb +170 -116
  24. data/lib/regexp_parser/scanner.rb +952 -2431
  25. data/lib/regexp_parser/scanner/char_type.rl +31 -0
  26. data/lib/regexp_parser/scanner/properties/long.yml +561 -0
  27. data/lib/regexp_parser/scanner/properties/short.yml +225 -0
  28. data/lib/regexp_parser/scanner/property.rl +7 -806
  29. data/lib/regexp_parser/scanner/scanner.rl +112 -154
  30. data/lib/regexp_parser/syntax/base.rb +4 -4
  31. data/lib/regexp_parser/syntax/tokens.rb +1 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
  34. data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
  35. data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
  36. data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
  38. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
  39. data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
  40. data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
  41. data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
  42. data/lib/regexp_parser/version.rb +1 -1
  43. data/regexp_parser.gemspec +2 -1
  44. data/test/expression/test_base.rb +2 -1
  45. data/test/expression/test_clone.rb +0 -57
  46. data/test/expression/test_set.rb +31 -8
  47. data/test/expression/test_strfregexp.rb +13 -4
  48. data/test/expression/test_subexpression.rb +25 -0
  49. data/test/expression/test_traverse.rb +25 -25
  50. data/test/helpers.rb +1 -0
  51. data/test/lexer/test_all.rb +1 -1
  52. data/test/lexer/test_conditionals.rb +9 -7
  53. data/test/lexer/test_nesting.rb +39 -21
  54. data/test/lexer/test_refcalls.rb +4 -4
  55. data/test/parser/set/test_intersections.rb +127 -0
  56. data/test/parser/set/test_ranges.rb +111 -0
  57. data/test/parser/test_all.rb +4 -1
  58. data/test/parser/test_escapes.rb +41 -9
  59. data/test/parser/test_groups.rb +22 -3
  60. data/test/parser/test_posix_classes.rb +27 -0
  61. data/test/parser/test_properties.rb +17 -290
  62. data/test/parser/test_refcalls.rb +66 -26
  63. data/test/parser/test_sets.rb +132 -129
  64. data/test/scanner/test_all.rb +1 -7
  65. data/test/scanner/test_conditionals.rb +16 -16
  66. data/test/scanner/test_errors.rb +0 -30
  67. data/test/scanner/test_escapes.rb +1 -2
  68. data/test/scanner/test_free_space.rb +28 -28
  69. data/test/scanner/test_groups.rb +35 -35
  70. data/test/scanner/test_meta.rb +1 -1
  71. data/test/scanner/test_properties.rb +87 -114
  72. data/test/scanner/test_refcalls.rb +18 -18
  73. data/test/scanner/test_scripts.rb +19 -351
  74. data/test/scanner/test_sets.rb +87 -60
  75. data/test/scanner/test_unicode_blocks.rb +4 -105
  76. data/test/support/warning_extractor.rb +1 -1
  77. data/test/syntax/test_syntax.rb +7 -0
  78. data/test/syntax/versions/test_1.8.rb +2 -4
  79. metadata +17 -7
  80. data/ChangeLog +0 -325
  81. data/test/scanner/test_emojis.rb +0 -31
@@ -67,8 +67,8 @@ module Regexp::Syntax
67
67
  [:backref, :name_ref]
68
68
  when :name_call_ab, :name_call_sq
69
69
  [:backref, :name_call]
70
- when :name_nest_ref_ab, :name_nest_ref_sq
71
- [:backref, :name_nest_ref]
70
+ when :name_recursion_ref_ab, :name_recursion_ref_sq
71
+ [:backref, :name_recursion_ref]
72
72
  when :number_ref_ab, :number_ref_sq
73
73
  [:backref, :number_ref]
74
74
  when :number_call_ab, :number_call_sq
@@ -77,8 +77,8 @@ module Regexp::Syntax
77
77
  [:backref, :number_rel_ref]
78
78
  when :number_rel_call_ab, :number_rel_call_sq
79
79
  [:backref, :number_rel_call]
80
- when :number_nest_ref_ab, :number_nest_ref_sq
81
- [:backref, :number_nest_ref]
80
+ when :number_recursion_ref_ab, :number_recursion_ref_sq
81
+ [:backref, :number_recursion_ref]
82
82
  else
83
83
  [type, token]
84
84
  end
@@ -23,6 +23,7 @@ end
23
23
  require 'regexp_parser/syntax/tokens/anchor'
24
24
  require 'regexp_parser/syntax/tokens/assertion'
25
25
  require 'regexp_parser/syntax/tokens/backref'
26
+ require 'regexp_parser/syntax/tokens/posix_class'
26
27
  require 'regexp_parser/syntax/tokens/character_set'
27
28
  require 'regexp_parser/syntax/tokens/character_type'
28
29
  require 'regexp_parser/syntax/tokens/conditional'
@@ -5,9 +5,9 @@ module Regexp::Syntax
5
5
  Name = [:name_ref]
6
6
  Number = [:number, :number_ref, :number_rel_ref]
7
7
 
8
- NestLevel = [:name_nest_ref, :number_nest_ref]
8
+ RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
9
9
 
10
- All = Name + Number + NestLevel
10
+ All = Name + Number + RecursionLevel
11
11
  Type = :backref
12
12
  end
13
13
 
@@ -4,48 +4,13 @@ module Regexp::Syntax
4
4
  module CharacterSet
5
5
  OpenClose = [:open, :close]
6
6
 
7
- Basic = [:negate, :member, :range]
8
- Extended = Basic + [:escape, :intersection, :backspace,
9
- :member_hex, :range_hex]
7
+ Basic = [:negate, :range]
8
+ Extended = Basic + [:intersection, :backspace]
10
9
 
11
- Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
12
- :type_space, :type_nonspace, :type_word, :type_nonword]
13
-
14
- Clustered = [:type_linebreak, :type_xgrapheme]
15
-
16
- module POSIX
17
- Standard = [
18
- :class_alnum, :class_alpha, :class_blank, :class_cntrl,
19
- :class_digit, :class_graph, :class_lower, :class_print,
20
- :class_punct, :class_space, :class_upper, :class_xdigit,
21
- ]
22
-
23
- StandardNegative = [
24
- :class_nonalnum, :class_nonalpha, :class_nonblank,
25
- :class_noncntrl, :class_nondigit, :class_nongraph,
26
- :class_nonlower, :class_nonprint, :class_nonpunct,
27
- :class_nonspace, :class_nonupper, :class_nonxdigit,
28
- ]
29
-
30
- Extensions = [:class_ascii, :class_word]
31
- ExtensionsNegative = [:class_nonascii, :class_nonword]
32
-
33
- All = Standard + StandardNegative + Extensions + ExtensionsNegative
34
- end
35
-
36
- All = Basic + Extended + Types + Clustered + POSIX::All
10
+ All = Basic + Extended
37
11
  Type = :set
38
-
39
- module SubSet
40
- OpenClose = [:open, :close]
41
-
42
- All = CharacterSet::All
43
- Type = :subset
44
- end
45
12
  end
46
13
 
47
14
  Map[CharacterSet::Type] = CharacterSet::All
48
- Map[CharacterSet::SubSet::Type] = CharacterSet::All
49
-
50
15
  end
51
16
  end
@@ -9,7 +9,7 @@ module Regexp::Syntax
9
9
  Control = [:control, :meta_sequence]
10
10
 
11
11
  ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
12
- :space, :tab, :vertical_tab]
12
+ :tab, :vertical_tab]
13
13
 
14
14
  Unicode = [:codepoint, :codepoint_list]
15
15
 
@@ -18,8 +18,7 @@ module Regexp::Syntax
18
18
  :bol, :eol,
19
19
  :group_open, :group_close,
20
20
  :interval_open, :interval_close,
21
- :set_open, :set_close,
22
- :baclslash]
21
+ :set_open, :set_close]
23
22
 
24
23
  Hex = [:hex]
25
24
 
@@ -3,18 +3,19 @@ module Regexp::Syntax
3
3
 
4
4
  module Group
5
5
  Basic = [:capture, :close]
6
- Extended = Basic + [:options]
6
+ Extended = Basic + [:options, :options_switch]
7
7
 
8
8
  Named = [:named]
9
9
  Atomic = [:atomic]
10
10
  Passive = [:passive]
11
11
  Comment = [:comment]
12
12
 
13
- All = Group::Extended + Group::Named + Group::Atomic +
14
- Group::Passive + Group::Comment
13
+ V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
14
+ Group::Passive + Group::Comment
15
15
 
16
- Absence = [:absence]
16
+ V2_4_1 = [:absence]
17
17
 
18
+ All = V1_8_6 + V2_4_1
18
19
  Type = :group
19
20
  end
20
21
 
@@ -1,16 +1,16 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
- module CharacterClass
3
+ module PosixClass
4
4
  Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
5
  :lower, :print, :punct, :space, :upper, :xdigit]
6
6
 
7
7
  Extensions = [:ascii, :word]
8
8
 
9
9
  All = Standard + Extensions
10
- Type = :class
11
- NonType = :nonclass
10
+ Type = :posixclass
11
+ NonType = :nonposixclass
12
12
  end
13
- Map[CharacterClass::Type] = CharacterClass::All
14
- Map[CharacterClass::NonType] = CharacterClass::All
13
+ Map[PosixClass::Type] = PosixClass::All
14
+ Map[PosixClass::NonType] = PosixClass::All
15
15
  end
16
16
  end
@@ -10,24 +10,24 @@ module Regexp::Syntax
10
10
  POSIX = [:any, :assigned, :newline]
11
11
 
12
12
  module Category
13
- Letter = [:letter_any, :letter_uppercase, :letter_lowercase,
14
- :letter_titlecase, :letter_modifier, :letter_other]
13
+ Letter = [:letter, :uppercase_letter, :lowercase_letter,
14
+ :titlecase_letter, :modifier_letter, :other_letter]
15
15
 
16
- Mark = [:mark_any, :mark_nonspacing, :mark_spacing,
17
- :mark_enclosing]
16
+ Mark = [:mark, :nonspacing_mark, :spacing_mark,
17
+ :enclosing_mark]
18
18
 
19
- Number = [:number_any, :number_decimal, :number_letter,
20
- :number_other]
19
+ Number = [:number, :decimal_number, :letter_number,
20
+ :other_number]
21
21
 
22
- Punctuation = [:punct_any, :punct_connector, :punct_dash,
23
- :punct_open, :punct_close, :punct_initial,
24
- :punct_final, :punct_other]
22
+ Punctuation = [:punctuation, :connector_punctuation, :dash_punctuation,
23
+ :open_punctuation, :close_punctuation, :initial_punctuation,
24
+ :final_punctuation, :other_punctuation]
25
25
 
26
- Symbol = [:symbol_any, :symbol_math, :symbol_currency,
27
- :symbol_modifier, :symbol_other]
26
+ Symbol = [:symbol, :math_symbol, :currency_symbol,
27
+ :modifier_symbol, :other_symbol]
28
28
 
29
- Separator = [:separator_any, :separator_space, :separator_line,
30
- :separator_para]
29
+ Separator = [:separator, :space_separator, :line_separator,
30
+ :paragraph_separator]
31
31
 
32
32
  Codepoint = [:other, :control, :format,
33
33
  :surrogate, :private_use, :unassigned]
@@ -37,25 +37,25 @@ module Regexp::Syntax
37
37
  end
38
38
 
39
39
  # As of ruby version 1.9.3
40
- Age_V1_9_3 = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
41
- :age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
42
- :age_5_2, :age_6_0]
40
+ Age_V1_9_3 = [:'age=1.1', :'age=2.0', :'age=2.1', :'age=3.0', :'age=3.1',
41
+ :'age=3.2', :'age=4.0', :'age=4.1', :'age=5.0', :'age=5.1',
42
+ :'age=5.2', :'age=6.0']
43
43
 
44
- Age_V2_0_0 = [:age_6_1]
44
+ Age_V2_0_0 = [:'age=6.1']
45
45
 
46
46
  # These were merged (from Onigmo) in the branch for 2.2.0
47
- Age_V2_2_0 = [:age_6_2, :age_6_3, :age_7_0]
47
+ Age_V2_2_0 = [:'age=6.2', :'age=6.3', :'age=7.0']
48
48
 
49
- Age_V2_3_0 = [:age_8_0]
49
+ Age_V2_3_0 = [:'age=8.0']
50
50
 
51
- Age_V2_4_0 = [:age_9_0]
51
+ Age_V2_4_0 = [:'age=9.0']
52
52
 
53
- Age_V2_5_0 = [:age_10_0]
53
+ Age_V2_5_0 = [:'age=10.0']
54
54
 
55
55
  Age = Age_V1_9_3 + Age_V2_0_0 + Age_V2_2_0 + Age_V2_3_0 + Age_V2_4_0 + Age_V2_5_0
56
56
 
57
57
  Derived_V1_9_0 = [
58
- :ascii_hex,
58
+ :ascii_hex_digit,
59
59
  :alphabetic,
60
60
  :cased,
61
61
  :changes_when_casefolded,
@@ -67,7 +67,7 @@ module Regexp::Syntax
67
67
  :bidi_control,
68
68
  :dash,
69
69
  :deprecated,
70
- :default_ignorable_cp,
70
+ :default_ignorable_code_point,
71
71
  :diacritic,
72
72
  :extender,
73
73
  :grapheme_base,
@@ -78,297 +78,550 @@ module Regexp::Syntax
78
78
  :id_continue,
79
79
  :ideographic,
80
80
  :id_start,
81
- :ids_binary_op,
82
- :ids_trinary_op,
81
+ :ids_binary_operator,
82
+ :ids_trinary_operator,
83
83
  :join_control,
84
84
  :logical_order_exception,
85
85
  :lowercase,
86
86
  :math,
87
- :non_character_cp,
87
+ :noncharacter_code_point,
88
88
  :other_alphabetic,
89
- :other_default_ignorable_cp,
90
- :other_grapheme_extended,
89
+ :other_default_ignorable_code_point,
90
+ :other_grapheme_extend,
91
91
  :other_id_continue,
92
92
  :other_id_start,
93
93
  :other_lowercase,
94
94
  :other_math,
95
95
  :other_uppercase,
96
96
  :pattern_syntax,
97
- :pattern_whitespace,
97
+ :pattern_white_space,
98
98
  :quotation_mark,
99
99
  :radical,
100
- :soft_dotted,
101
100
  :sentence_terminal,
101
+ :soft_dotted,
102
102
  :terminal_punctuation,
103
103
  :unified_ideograph,
104
104
  :uppercase,
105
105
  :variation_selector,
106
- :whitespace,
106
+ :white_space,
107
107
  :xid_start,
108
108
  :xid_continue,
109
109
  ]
110
110
 
111
+ Derived_V2_0_0 = [
112
+ :cased_letter,
113
+ :combining_mark,
114
+ ]
115
+
116
+ Derived_V2_4_0 = [
117
+ :prepended_concatenation_mark,
118
+ ]
119
+
111
120
  Derived_V2_5_0 = [
112
121
  :regional_indicator
113
122
  ]
114
123
 
115
- Derived = Derived_V1_9_0 + Derived_V2_5_0
124
+ Derived = Derived_V1_9_0 + Derived_V2_0_0 + Derived_V2_4_0 + Derived_V2_5_0
116
125
 
117
126
  Script_V1_9_0 = [
118
- :script_arabic,
119
- :script_imperial_aramaic,
120
- :script_armenian,
121
- :script_avestan,
122
- :script_balinese,
123
- :script_bamum,
124
- :script_bengali,
125
- :script_bopomofo,
126
- :script_braille,
127
- :script_buginese,
128
- :script_buhid,
129
- :script_canadian_aboriginal,
130
- :script_carian,
131
- :script_cham,
132
- :script_cherokee,
133
- :script_coptic,
134
- :script_cypriot,
135
- :script_cyrillic,
136
- :script_devanagari,
137
- :script_deseret,
138
- :script_egyptian_hieroglyphs,
139
- :script_ethiopic,
140
- :script_georgian,
141
- :script_glagolitic,
142
- :script_gothic,
143
- :script_greek,
144
- :script_gujarati,
145
- :script_gurmukhi,
146
- :script_hangul,
147
- :script_han,
148
- :script_hanunoo,
149
- :script_hebrew,
150
- :script_hiragana,
151
- :script_katakana_or_hiragana,
152
- :script_old_italic,
153
- :script_javanese,
154
- :script_kayah_li,
155
- :script_katakana,
156
- :script_kharoshthi,
157
- :script_khmer,
158
- :script_kannada,
159
- :script_kaithi,
160
- :script_tai_tham,
161
- :script_lao,
162
- :script_latin,
163
- :script_lepcha,
164
- :script_limbu,
165
- :script_linear_b,
166
- :script_lisu,
167
- :script_lycian,
168
- :script_lydian,
169
- :script_malayalam,
170
- :script_mongolian,
171
- :script_meetei_mayek,
172
- :script_myanmar,
173
- :script_nko,
174
- :script_ogham,
175
- :script_ol_chiki,
176
- :script_old_turkic,
177
- :script_oriya,
178
- :script_osmanya,
179
- :script_phags_pa,
180
- :script_inscriptional_pahlavi,
181
- :script_phoenician,
182
- :script_inscriptional_parthian,
183
- :script_rejang,
184
- :script_runic,
185
- :script_samaritan,
186
- :script_old_south_arabian,
187
- :script_saurashtra,
188
- :script_shavian,
189
- :script_sinhala,
190
- :script_sundanese,
191
- :script_syloti_nagri,
192
- :script_syriac,
193
- :script_tagbanwa,
194
- :script_tai_le,
195
- :script_new_tai_lue,
196
- :script_tamil,
197
- :script_tai_viet,
198
- :script_telugu,
199
- :script_tifinagh,
200
- :script_tagalog,
201
- :script_thaana,
202
- :script_thai,
203
- :script_tibetan,
204
- :script_ugaritic,
205
- :script_vai,
206
- :script_old_persian,
207
- :script_cuneiform,
208
- :script_yi,
209
- :script_inherited,
210
- :script_common,
211
- :script_unknown
127
+ :arabic,
128
+ :imperial_aramaic,
129
+ :armenian,
130
+ :avestan,
131
+ :balinese,
132
+ :bamum,
133
+ :bengali,
134
+ :bopomofo,
135
+ :braille,
136
+ :buginese,
137
+ :buhid,
138
+ :canadian_aboriginal,
139
+ :carian,
140
+ :cham,
141
+ :cherokee,
142
+ :coptic,
143
+ :cypriot,
144
+ :cyrillic,
145
+ :devanagari,
146
+ :deseret,
147
+ :egyptian_hieroglyphs,
148
+ :ethiopic,
149
+ :georgian,
150
+ :glagolitic,
151
+ :gothic,
152
+ :greek,
153
+ :gujarati,
154
+ :gurmukhi,
155
+ :hangul,
156
+ :han,
157
+ :hanunoo,
158
+ :hebrew,
159
+ :hiragana,
160
+ :old_italic,
161
+ :javanese,
162
+ :kayah_li,
163
+ :katakana,
164
+ :kharoshthi,
165
+ :khmer,
166
+ :kannada,
167
+ :kaithi,
168
+ :tai_tham,
169
+ :lao,
170
+ :latin,
171
+ :lepcha,
172
+ :limbu,
173
+ :linear_b,
174
+ :lisu,
175
+ :lycian,
176
+ :lydian,
177
+ :malayalam,
178
+ :mongolian,
179
+ :meetei_mayek,
180
+ :myanmar,
181
+ :nko,
182
+ :ogham,
183
+ :ol_chiki,
184
+ :old_turkic,
185
+ :oriya,
186
+ :osmanya,
187
+ :phags_pa,
188
+ :inscriptional_pahlavi,
189
+ :phoenician,
190
+ :inscriptional_parthian,
191
+ :rejang,
192
+ :runic,
193
+ :samaritan,
194
+ :old_south_arabian,
195
+ :saurashtra,
196
+ :shavian,
197
+ :sinhala,
198
+ :sundanese,
199
+ :syloti_nagri,
200
+ :syriac,
201
+ :tagbanwa,
202
+ :tai_le,
203
+ :new_tai_lue,
204
+ :tamil,
205
+ :tai_viet,
206
+ :telugu,
207
+ :tifinagh,
208
+ :tagalog,
209
+ :thaana,
210
+ :thai,
211
+ :tibetan,
212
+ :ugaritic,
213
+ :vai,
214
+ :old_persian,
215
+ :cuneiform,
216
+ :yi,
217
+ :inherited,
218
+ :common,
219
+ :unknown
220
+ ]
221
+
222
+ Script_V1_9_3 = [
223
+ :brahmi,
224
+ :batak,
225
+ :mandaic
212
226
  ]
213
227
 
214
- Script_V1_9_3 = [:script_brahmi, :script_batak, :script_mandaic]
228
+ Script_V2_0_0 = [
229
+ :chakma,
230
+ :meroitic_cursive,
231
+ :meroitic_hieroglyphs,
232
+ :miao,
233
+ :sharada,
234
+ :sora_sompeng,
235
+ :takri,
236
+ ]
215
237
 
216
238
  Script_V2_2_0 = [
217
- :script_caucasian_albanian,
218
- :script_bassa_vah,
219
- :script_duployan,
220
- :script_elbasan,
221
- :script_grantha,
222
- :script_pahawh_hmong,
223
- :script_khojki,
224
- :script_linear_a,
225
- :script_mahajani,
226
- :script_manichaean,
227
- :script_mende_kikakui,
228
- :script_modi,
229
- :script_mro,
230
- :script_old_north_arabian,
231
- :script_nabataean,
232
- :script_palmyrene,
233
- :script_pau_cin_hau,
234
- :script_old_permic,
235
- :script_psalter_pahlavi,
236
- :script_siddham,
237
- :script_khudawadi,
238
- :script_tirhuta,
239
- :script_warang_citi
239
+ :caucasian_albanian,
240
+ :bassa_vah,
241
+ :duployan,
242
+ :elbasan,
243
+ :grantha,
244
+ :pahawh_hmong,
245
+ :khojki,
246
+ :linear_a,
247
+ :mahajani,
248
+ :manichaean,
249
+ :mende_kikakui,
250
+ :modi,
251
+ :mro,
252
+ :old_north_arabian,
253
+ :nabataean,
254
+ :palmyrene,
255
+ :pau_cin_hau,
256
+ :old_permic,
257
+ :psalter_pahlavi,
258
+ :siddham,
259
+ :khudawadi,
260
+ :tirhuta,
261
+ :warang_citi
262
+ ]
263
+
264
+ Script_V2_3_0 = [
265
+ :ahom,
266
+ :anatolian_hieroglyphs,
267
+ :hatran,
268
+ :multani,
269
+ :old_hungarian,
270
+ :signwriting,
240
271
  ]
241
272
 
242
- Script = Script_V1_9_0 + Script_V1_9_3 + Script_V2_2_0
243
-
244
- UnicodeBlock = [
245
- :block_inalphabetic_presentation_forms,
246
- :block_inarabic_presentation_forms_a,
247
- :block_inarabic_presentation_forms_b,
248
- :block_inarabic,
249
- :block_inarmenian,
250
- :block_inarrows,
251
- :block_inbasic_latin,
252
- :block_inbengali,
253
- :block_inblock_elements,
254
- :block_inbopomofo_extended,
255
- :block_inbopomofo,
256
- :block_inbox_drawing,
257
- :block_inbraille_patterns,
258
- :block_inbuhid,
259
- :block_incjk_compatibility_forms,
260
- :block_incjk_compatibility_ideographs,
261
- :block_incjk_compatibility,
262
- :block_incjk_radicals_supplement,
263
- :block_incjk_symbols_and_punctuation,
264
- :block_incjk_unified_ideographs_extension_a,
265
- :block_incjk_unified_ideographs,
266
- :block_incherokee,
267
- :block_incombining_diacritical_marks_for_symbols,
268
- :block_incombining_diacritical_marks,
269
- :block_incombining_half_marks,
270
- :block_incontrol_pictures,
271
- :block_incurrency_symbols,
272
- :block_incyrillic_supplement,
273
- :block_incyrillic,
274
- :block_indevanagari,
275
- :block_indingbats,
276
- :block_inenclosed_alphanumerics,
277
- :block_inenclosed_cjk_letters_and_months,
278
- :block_inethiopic,
279
- :block_ingeneral_punctuation,
280
- :block_ingeometric_shapes,
281
- :block_ingeorgian,
282
- :block_ingreek_extended,
283
- :block_ingreek_and_coptic,
284
- :block_ingujarati,
285
- :block_ingurmukhi,
286
- :block_inhalfwidth_and_fullwidth_forms,
287
- :block_inhangul_compatibility_jamo,
288
- :block_inhangul_jamo,
289
- :block_inhangul_syllables,
290
- :block_inhanunoo,
291
- :block_inhebrew,
292
- :block_inhigh_private_use_surrogates,
293
- :block_inhigh_surrogates,
294
- :block_inhiragana,
295
- :block_inipa_extensions,
296
- :block_inideographic_description_characters,
297
- :block_inkanbun,
298
- :block_inkangxi_radicals,
299
- :block_inkannada,
300
- :block_inkatakana_phonetic_extensions,
301
- :block_inkatakana,
302
- :block_inkhmer_symbols,
303
- :block_inkhmer,
304
- :block_inlao,
305
- :block_inlatin_1_supplement,
306
- :block_inlatin_extended_a,
307
- :block_inlatin_extended_b,
308
- :block_inlatin_extended_additional,
309
- :block_inletterlike_symbols,
310
- :block_inlimbu,
311
- :block_inlow_surrogates,
312
- :block_inmalayalam,
313
- :block_inmathematical_operators,
314
- :block_inmiscellaneous_mathematical_symbols_a,
315
- :block_inmiscellaneous_mathematical_symbols_b,
316
- :block_inmiscellaneous_symbols_and_arrows,
317
- :block_inmiscellaneous_symbols,
318
- :block_inmiscellaneous_technical,
319
- :block_inmongolian,
320
- :block_inmyanmar,
321
- :block_innumber_forms,
322
- :block_inogham,
323
- :block_inoptical_character_recognition,
324
- :block_inoriya,
325
- :block_inphonetic_extensions,
326
- :block_inprivate_use_area,
327
- :block_inrunic,
328
- :block_insinhala,
329
- :block_insmall_form_variants,
330
- :block_inspacing_modifier_letters,
331
- :block_inspecials,
332
- :block_insuperscripts_and_subscripts,
333
- :block_insupplemental_arrows_a,
334
- :block_insupplemental_arrows_b,
335
- :block_insupplemental_mathematical_operators,
336
- :block_insyriac,
337
- :block_intagalog,
338
- :block_intagbanwa,
339
- :block_intai_le,
340
- :block_intamil,
341
- :block_intelugu,
342
- :block_inthaana,
343
- :block_inthai,
344
- :block_intibetan,
345
- :block_inunified_canadian_aboriginal_syllabics,
346
- :block_invariation_selectors,
347
- :block_inyi_radicals,
348
- :block_inyi_syllables,
349
- :block_inyijing_hexagram_symbols,
273
+ Script_V2_4_0 = [
274
+ :adlam,
275
+ :bhaiksuki,
276
+ :marchen,
277
+ :newa,
278
+ :osage,
279
+ :tangut,
350
280
  ]
351
281
 
352
- Emoji = [
353
- :emoji_any,
282
+ Script_V2_5_0 = [
283
+ :masaram_gondi,
284
+ :nushu,
285
+ :soyombo,
286
+ :zanabazar_square,
287
+ ]
288
+
289
+ Script = Script_V1_9_0 + Script_V1_9_3 + Script_V2_0_0 +
290
+ Script_V2_2_0 + Script_V2_3_0 + Script_V2_4_0 + Script_V2_5_0
291
+
292
+ UnicodeBlock_V1_9_0 = [
293
+ :in_alphabetic_presentation_forms,
294
+ :in_arabic_presentation_forms_a,
295
+ :in_arabic_presentation_forms_b,
296
+ :in_arabic,
297
+ :in_armenian,
298
+ :in_arrows,
299
+ :in_basic_latin,
300
+ :in_bengali,
301
+ :in_block_elements,
302
+ :in_bopomofo_extended,
303
+ :in_bopomofo,
304
+ :in_box_drawing,
305
+ :in_braille_patterns,
306
+ :in_buhid,
307
+ :in_cjk_compatibility_forms,
308
+ :in_cjk_compatibility_ideographs,
309
+ :in_cjk_compatibility,
310
+ :in_cjk_radicals_supplement,
311
+ :in_cjk_symbols_and_punctuation,
312
+ :in_cjk_unified_ideographs_extension_a,
313
+ :in_cjk_unified_ideographs,
314
+ :in_cherokee,
315
+ :in_combining_diacritical_marks_for_symbols,
316
+ :in_combining_diacritical_marks,
317
+ :in_combining_half_marks,
318
+ :in_control_pictures,
319
+ :in_currency_symbols,
320
+ :in_cyrillic_supplement,
321
+ :in_cyrillic,
322
+ :in_devanagari,
323
+ :in_dingbats,
324
+ :in_enclosed_alphanumerics,
325
+ :in_enclosed_cjk_letters_and_months,
326
+ :in_ethiopic,
327
+ :in_general_punctuation,
328
+ :in_geometric_shapes,
329
+ :in_georgian,
330
+ :in_greek_extended,
331
+ :in_greek_and_coptic,
332
+ :in_gujarati,
333
+ :in_gurmukhi,
334
+ :in_halfwidth_and_fullwidth_forms,
335
+ :in_hangul_compatibility_jamo,
336
+ :in_hangul_jamo,
337
+ :in_hangul_syllables,
338
+ :in_hanunoo,
339
+ :in_hebrew,
340
+ :in_high_private_use_surrogates,
341
+ :in_high_surrogates,
342
+ :in_hiragana,
343
+ :in_ipa_extensions,
344
+ :in_ideographic_description_characters,
345
+ :in_kanbun,
346
+ :in_kangxi_radicals,
347
+ :in_kannada,
348
+ :in_katakana_phonetic_extensions,
349
+ :in_katakana,
350
+ :in_khmer_symbols,
351
+ :in_khmer,
352
+ :in_lao,
353
+ :in_latin_1_supplement,
354
+ :in_latin_extended_a,
355
+ :in_latin_extended_b,
356
+ :in_latin_extended_additional,
357
+ :in_letterlike_symbols,
358
+ :in_limbu,
359
+ :in_low_surrogates,
360
+ :in_malayalam,
361
+ :in_mathematical_operators,
362
+ :in_miscellaneous_mathematical_symbols_a,
363
+ :in_miscellaneous_mathematical_symbols_b,
364
+ :in_miscellaneous_symbols_and_arrows,
365
+ :in_miscellaneous_symbols,
366
+ :in_miscellaneous_technical,
367
+ :in_mongolian,
368
+ :in_myanmar,
369
+ :in_number_forms,
370
+ :in_ogham,
371
+ :in_optical_character_recognition,
372
+ :in_oriya,
373
+ :in_phonetic_extensions,
374
+ :in_private_use_area,
375
+ :in_runic,
376
+ :in_sinhala,
377
+ :in_small_form_variants,
378
+ :in_spacing_modifier_letters,
379
+ :in_specials,
380
+ :in_superscripts_and_subscripts,
381
+ :in_supplemental_arrows_a,
382
+ :in_supplemental_arrows_b,
383
+ :in_supplemental_mathematical_operators,
384
+ :in_syriac,
385
+ :in_tagalog,
386
+ :in_tagbanwa,
387
+ :in_tai_le,
388
+ :in_tamil,
389
+ :in_telugu,
390
+ :in_thaana,
391
+ :in_thai,
392
+ :in_tibetan,
393
+ :in_unified_canadian_aboriginal_syllabics,
394
+ :in_variation_selectors,
395
+ :in_yi_radicals,
396
+ :in_yi_syllables,
397
+ :in_yijing_hexagram_symbols,
398
+ ]
399
+
400
+ UnicodeBlock_V2_0_0 = [
401
+ :in_aegean_numbers,
402
+ :in_alchemical_symbols,
403
+ :in_ancient_greek_musical_notation,
404
+ :in_ancient_greek_numbers,
405
+ :in_ancient_symbols,
406
+ :in_arabic_extended_a,
407
+ :in_arabic_mathematical_alphabetic_symbols,
408
+ :in_arabic_presentation_forms_a,
409
+ :in_arabic_presentation_forms_b,
410
+ :in_arabic_supplement,
411
+ :in_avestan,
412
+ :in_balinese,
413
+ :in_bamum,
414
+ :in_bamum_supplement,
415
+ :in_batak,
416
+ :in_brahmi,
417
+ :in_buginese,
418
+ :in_byzantine_musical_symbols,
419
+ :in_cjk_compatibility_ideographs_supplement,
420
+ :in_cjk_strokes,
421
+ :in_cjk_unified_ideographs_extension_b,
422
+ :in_cjk_unified_ideographs_extension_c,
423
+ :in_cjk_unified_ideographs_extension_d,
424
+ :in_carian,
425
+ :in_chakma,
426
+ :in_cham,
427
+ :in_combining_diacritical_marks_supplement,
428
+ :in_common_indic_number_forms,
429
+ :in_coptic,
430
+ :in_counting_rod_numerals,
431
+ :in_cuneiform,
432
+ :in_cuneiform_numbers_and_punctuation,
433
+ :in_cypriot_syllabary,
434
+ :in_cyrillic_extended_a,
435
+ :in_cyrillic_extended_b,
436
+ :in_deseret,
437
+ :in_devanagari_extended,
438
+ :in_domino_tiles,
439
+ :in_egyptian_hieroglyphs,
440
+ :in_emoticons,
441
+ :in_enclosed_alphanumeric_supplement,
442
+ :in_enclosed_ideographic_supplement,
443
+ :in_ethiopic_extended,
444
+ :in_ethiopic_extended_a,
445
+ :in_ethiopic_supplement,
446
+ :in_georgian_supplement,
447
+ :in_glagolitic,
448
+ :in_gothic,
449
+ :in_hangul_jamo_extended_a,
450
+ :in_hangul_jamo_extended_b,
451
+ :in_imperial_aramaic,
452
+ :in_inscriptional_pahlavi,
453
+ :in_inscriptional_parthian,
454
+ :in_javanese,
455
+ :in_kaithi,
456
+ :in_kana_supplement,
457
+ :in_kayah_li,
458
+ :in_kharoshthi,
459
+ :in_latin_1_supplement,
460
+ :in_latin_extended_a,
461
+ :in_latin_extended_b,
462
+ :in_latin_extended_c,
463
+ :in_latin_extended_d,
464
+ :in_lepcha,
465
+ :in_linear_b_ideograms,
466
+ :in_linear_b_syllabary,
467
+ :in_lisu,
468
+ :in_lycian,
469
+ :in_lydian,
470
+ :in_mahjong_tiles,
471
+ :in_mandaic,
472
+ :in_mathematical_alphanumeric_symbols,
473
+ :in_meetei_mayek,
474
+ :in_meetei_mayek_extensions,
475
+ :in_meroitic_cursive,
476
+ :in_meroitic_hieroglyphs,
477
+ :in_miao,
478
+ :in_miscellaneous_mathematical_symbols_a,
479
+ :in_miscellaneous_mathematical_symbols_b,
480
+ :in_miscellaneous_symbols_and_pictographs,
481
+ :in_modifier_tone_letters,
482
+ :in_musical_symbols,
483
+ :in_myanmar_extended_a,
484
+ :in_nko,
485
+ :in_new_tai_lue,
486
+ :in_no_block,
487
+ :in_ol_chiki,
488
+ :in_old_italic,
489
+ :in_old_persian,
490
+ :in_old_south_arabian,
491
+ :in_old_turkic,
492
+ :in_osmanya,
493
+ :in_phags_pa,
494
+ :in_phaistos_disc,
495
+ :in_phoenician,
496
+ :in_phonetic_extensions_supplement,
497
+ :in_playing_cards,
498
+ :in_rejang,
499
+ :in_rumi_numeral_symbols,
500
+ :in_samaritan,
501
+ :in_saurashtra,
502
+ :in_sharada,
503
+ :in_shavian,
504
+ :in_sora_sompeng,
505
+ :in_sundanese,
506
+ :in_sundanese_supplement,
507
+ :in_supplemental_arrows_a,
508
+ :in_supplemental_arrows_b,
509
+ :in_supplemental_punctuation,
510
+ :in_supplementary_private_use_area_a,
511
+ :in_supplementary_private_use_area_b,
512
+ :in_syloti_nagri,
513
+ :in_tags,
514
+ :in_tai_tham,
515
+ :in_tai_viet,
516
+ :in_tai_xuan_jing_symbols,
517
+ :in_takri,
518
+ :in_tifinagh,
519
+ :in_transport_and_map_symbols,
520
+ :in_ugaritic,
521
+ :in_unified_canadian_aboriginal_syllabics_extended,
522
+ :in_vai,
523
+ :in_variation_selectors_supplement,
524
+ :in_vedic_extensions,
525
+ :in_vertical_forms,
526
+ ]
527
+
528
+ UnicodeBlock_V2_2_0 = [
529
+ :in_bassa_vah,
530
+ :in_caucasian_albanian,
531
+ :in_combining_diacritical_marks_extended,
532
+ :in_coptic_epact_numbers,
533
+ :in_duployan,
534
+ :in_elbasan,
535
+ :in_geometric_shapes_extended,
536
+ :in_grantha,
537
+ :in_khojki,
538
+ :in_khudawadi,
539
+ :in_latin_extended_e,
540
+ :in_linear_a,
541
+ :in_mahajani,
542
+ :in_manichaean,
543
+ :in_mende_kikakui,
544
+ :in_modi,
545
+ :in_mro,
546
+ :in_myanmar_extended_b,
547
+ :in_nabataean,
548
+ :in_old_north_arabian,
549
+ :in_old_permic,
550
+ :in_ornamental_dingbats,
551
+ :in_pahawh_hmong,
552
+ :in_palmyrene,
553
+ :in_pau_cin_hau,
554
+ :in_psalter_pahlavi,
555
+ :in_shorthand_format_controls,
556
+ :in_siddham,
557
+ :in_sinhala_archaic_numbers,
558
+ :in_supplemental_arrows_c,
559
+ :in_tirhuta,
560
+ :in_warang_citi,
561
+ ]
562
+
563
+ UnicodeBlock_V2_3_0 = [
564
+ :in_ahom,
565
+ :in_anatolian_hieroglyphs,
566
+ :in_cjk_unified_ideographs_extension_e,
567
+ :in_cherokee_supplement,
568
+ :in_early_dynastic_cuneiform,
569
+ :in_hatran,
570
+ :in_multani,
571
+ :in_old_hungarian,
572
+ :in_supplemental_symbols_and_pictographs,
573
+ :in_sutton_signwriting,
574
+ ]
575
+
576
+ UnicodeBlock_V2_4_0 = [
577
+ :in_adlam,
578
+ :in_bhaiksuki,
579
+ :in_cyrillic_extended_c,
580
+ :in_glagolitic_supplement,
581
+ :in_ideographic_symbols_and_punctuation,
582
+ :in_marchen,
583
+ :in_mongolian_supplement,
584
+ :in_newa,
585
+ :in_osage,
586
+ :in_tangut,
587
+ :in_tangut_components,
588
+ ]
589
+
590
+ UnicodeBlock_V2_5_0 = [
591
+ :in_cjk_unified_ideographs_extension_f,
592
+ :in_kana_extended_a,
593
+ :in_masaram_gondi,
594
+ :in_nushu,
595
+ :in_soyombo,
596
+ :in_syriac_supplement,
597
+ :in_zanabazar_square,
598
+ ]
599
+
600
+ UnicodeBlock = UnicodeBlock_V1_9_0 + UnicodeBlock_V2_0_0 + UnicodeBlock_V2_2_0 +
601
+ UnicodeBlock_V2_3_0 + UnicodeBlock_V2_4_0 + UnicodeBlock_V2_5_0
602
+
603
+ Emoji_V2_5_0 = [
604
+ :emoji,
354
605
  :emoji_component,
355
606
  :emoji_modifier,
356
607
  :emoji_modifier_base,
357
608
  :emoji_presentation,
358
609
  ]
359
610
 
360
- V1_9_0 = CharType_V1_9_0 + POSIX + Category::All + Derived_V1_9_0 + Script_V1_9_0 + UnicodeBlock
611
+ Emoji = Emoji_V2_5_0
612
+
613
+ V1_9_0 = CharType_V1_9_0 + POSIX + Category::All + Derived_V1_9_0 + Script_V1_9_0 + UnicodeBlock_V1_9_0
361
614
  V1_9_3 = Age_V1_9_3 + Script_V1_9_3
362
615
 
363
- V2_0_0 = Age_V2_0_0
616
+ V2_0_0 = Age_V2_0_0 + Derived_V2_0_0 + Script_V2_0_0 + UnicodeBlock_V2_0_0
364
617
 
365
- V2_2_0 = Age_V2_2_0 + Script_V2_2_0
618
+ V2_2_0 = Age_V2_2_0 + Script_V2_2_0 + UnicodeBlock_V2_2_0
366
619
 
367
- V2_3_0 = Age_V2_3_0
620
+ V2_3_0 = Age_V2_3_0 + Script_V2_3_0 + UnicodeBlock_V2_3_0
368
621
 
369
- V2_4_0 = Age_V2_4_0
622
+ V2_4_0 = Age_V2_4_0 + Derived_V2_4_0 + Script_V2_4_0 + UnicodeBlock_V2_4_0
370
623
 
371
- V2_5_0 = Age_V2_5_0 + CharType_V2_5_0 + Derived_V2_5_0 + Emoji
624
+ V2_5_0 = Age_V2_5_0 + CharType_V2_5_0 + Derived_V2_5_0 + Emoji_V2_5_0 + Script_V2_5_0 + UnicodeBlock_V2_5_0
372
625
 
373
626
  All = V1_9_0 + V1_9_3 + V2_0_0 + V2_2_0 + V2_3_0 + V2_4_0 + V2_5_0
374
627